Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from src.about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| EVALUATION_QUEUE_TEXT, | |
| INTRODUCTION_TEXT, | |
| LLM_BENCHMARKS_TEXT, | |
| TITLE, | |
| ) | |
| from src.display.css_html_js import custom_css | |
| def load_speechiq_data(): | |
| """Load and process the SpeechIQ results from CSV file.""" | |
| try: | |
| df = pd.read_csv("SpeechIQ_table.csv") | |
| # Round numerical columns to 3 decimal places for better display | |
| numerical_cols = ['Remember', 'Understand', 'Apply', 'Speech IQ'] | |
| for col in numerical_cols: | |
| if col in df.columns: | |
| df[col] = df[col].round(3) | |
| # Sort by Speech IQ score in descending order | |
| df = df.sort_values('Speech IQ', ascending=False) | |
| # Add ranking with medal emojis | |
| df['Rank'] = '' | |
| for i in range(len(df)): | |
| if i == 0: | |
| df.iloc[i, df.columns.get_loc('Rank')] = 'π₯' | |
| elif i == 1: | |
| df.iloc[i, df.columns.get_loc('Rank')] = 'π₯' | |
| elif i == 2: | |
| df.iloc[i, df.columns.get_loc('Rank')] = 'π₯' | |
| else: | |
| df.iloc[i, df.columns.get_loc('Rank')] = f'{i+1}' | |
| # Reorder columns to put Speech IQ first, then Rank | |
| column_order = ['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder'] | |
| df = df[column_order] | |
| return df | |
| except Exception as e: | |
| print(f"Error loading SpeechIQ data: {e}") | |
| # Return empty dataframe with expected columns if file not found | |
| return pd.DataFrame(columns=['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder']) | |
| def get_top_performers(df): | |
| """Get statistics about top performers.""" | |
| if df.empty: | |
| return "No data available." | |
| top_score = df['Speech IQ'].max() | |
| top_model = df.loc[df['Speech IQ'].idxmax()] | |
| agentic_best = df[df['Model Type'].str.contains('Agentic', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('Agentic', na=False)].empty else 0 | |
| end2end_best = df[df['Model Type'].str.contains('End2End', na=False)]['Speech IQ'].max() if not df[df['Model Type'].str.contains('End2End', na=False)].empty else 0 | |
| stats_text = f""" | |
| ## π Leaderboard Statistics | |
| | Metric | Value | | |
| |--------|-------| | |
| | π **Top Performer** | {top_model['Setup']} | | |
| | π― **Highest Score** | **{top_score}** | | |
| | π€ **Best Agentic Model** | {agentic_best} | | |
| | π **Best End2End Model** | {end2end_best} | | |
| | π **Total Models** | {len(df)} | | |
| """ | |
| return stats_text | |
| # Load the data | |
| speechiq_df = load_speechiq_data() | |
| # Create the Gradio interface | |
| demo = gr.Blocks(css=custom_css, title="SpeechIQ Leaderboard") | |
| with demo: | |
| gr.HTML(TITLE) | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| with gr.TabItem("π SpeechIQ Leaderboard", elem_id="speechiq-leaderboard-tab", id=0): | |
| # Legend and explanation | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ### π Column Explanations | |
| - **Rank**: Position ranking with π₯π₯π₯ medals for top 3 performers | |
| - **Speech IQ**: Overall intelligence quotient combining all dimensions (primary metric) | |
| - **Remember**: Verbatim accuracy score (WER-based) | |
| - **Understand**: Semantic interpretation similarity score | |
| - **Apply**: Downstream task performance score | |
| - **Model Type**: Architecture approach (Agentic vs End2End) | |
| - **Setup**: Specific model configuration and components | |
| - **Audio Encoder**: The audio processing component used | |
| *Higher scores indicate better performance across all metrics.* | |
| """, elem_classes="markdown-text") | |
| # Main leaderboard table | |
| with gr.Row(): | |
| leaderboard_table = gr.Dataframe( | |
| value=speechiq_df, | |
| headers=speechiq_df.columns.tolist() if not speechiq_df.empty else ['Rank', 'Speech IQ', 'Remember', 'Understand', 'Apply', 'Model Type', 'Setup', 'Audio Encoder'], | |
| interactive=False, | |
| elem_classes="leaderboard-table" | |
| ) | |
| # Statistics section - moved before table | |
| with gr.Row(): | |
| gr.Markdown(get_top_performers(speechiq_df), elem_classes="markdown-text stats-section") | |
| with gr.TabItem("π Analysis", elem_id="analysis-tab", id=1): | |
| with gr.Row(): | |
| # Create performance comparison charts | |
| if not speechiq_df.empty: | |
| # Group by model type for comparison | |
| agentic_models = speechiq_df[speechiq_df['Model Type'].str.contains('Agentic', na=False)] | |
| end2end_models = speechiq_df[speechiq_df['Model Type'].str.contains('End2End', na=False)] | |
| comparison_text = f""" | |
| ### π Model Type Comparison | |
| **Agentic Models (ASR + LLM):** | |
| - Count: {len(agentic_models)} | |
| - Average Speech IQ: {agentic_models['Speech IQ'].mean():.2f} | |
| - Best Score: {agentic_models['Speech IQ'].max():.2f} | |
| **End-to-End Models:** | |
| - Count: {len(end2end_models)} | |
| - Average Speech IQ: {end2end_models['Speech IQ'].mean():.2f} | |
| - Best Score: {end2end_models['Speech IQ'].max():.2f} | |
| ### π― Cognitive Dimension Analysis | |
| **Remember (Verbatim Accuracy):** | |
| - Best performer: {speechiq_df.loc[speechiq_df['Remember'].idxmax(), 'Setup']} ({speechiq_df['Remember'].max():.3f}) | |
| **Understand (Semantic Similarity):** | |
| - Best performer: {speechiq_df.loc[speechiq_df['Understand'].idxmax(), 'Setup']} ({speechiq_df['Understand'].max():.3f}) | |
| **Apply (Task Performance):** | |
| - Best performer: {speechiq_df.loc[speechiq_df['Apply'].idxmax(), 'Setup']} ({speechiq_df['Apply'].max():.3f}) | |
| """ | |
| gr.Markdown(comparison_text, elem_classes="markdown-text") | |
| else: | |
| gr.Markdown("No data available for analysis.", elem_classes="markdown-text") | |
| with gr.TabItem("π About", elem_id="about-tab", id=2): | |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
| with gr.TabItem("π Submit", elem_id="submit-tab", id=3): | |
| gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
| # Citation section | |
| with gr.Row(): | |
| with gr.Accordion("π Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| lines=6, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| # Add refresh functionality | |
| with gr.Row(): | |
| refresh_button = gr.Button("π Refresh Data", variant="secondary") | |
| def refresh_data(): | |
| updated_df = load_speechiq_data() | |
| return updated_df | |
| refresh_button.click( | |
| refresh_data, | |
| outputs=leaderboard_table | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=False, server_name="0.0.0.0", server_port=7860) |