Spaces:

TabArena
/

leaderboard

Running

App Files Files Community

LennartPurucker commited on May 21

Commit

cb51391

1 Parent(s): e3793a3

maint: improve text

Browse files

Files changed (3) hide show

README.md +6 -14
constants.py +2 -2
main.py +60 -26

README.md CHANGED Viewed

@@ -11,20 +11,12 @@ short_description: 'TabArena'
 sdk_version: 4.44.0
 ---
-# TabArena Leaderboard code
-This repository contains the frontend code to display TabArena leaderboard. The leaderboard is hosted on a
-HuggingFace space.
 Reference:
-* website: tabarena.ai
-* paper: TODO
-* codebase to compute the leaderboard: https://github.com/autogluon/tabrepo/tree/tabarena
-TODOS:
-* add regression/classif/multiclassif
-DONE:
-* readme title and information
-* pull data from leaderboard
-* update columns

 sdk_version: 4.44.0
 ---
+# TabArena Leaderboard Code
+This repository contains the frontend code to display TabArena leaderboard.
+The leaderboard is hosted on a HuggingFace space.
 Reference:
+* Website: tabarena.ai
+* Paper: TBA
+* TabArena Codebase: https://tabarena.ai/code

constants.py CHANGED Viewed

@@ -4,9 +4,9 @@ class Constants:
     foundational: str = "Foundation Model"
     neural_network: str ="Neural Network"
     baseline: str = "Baseline"
     # Not Used
     other: str = "Other"
-    automl: str = "AutoML"
 model_type_emoji = {
     Constants.tree: "🌳",
@@ -15,5 +15,5 @@ model_type_emoji = {
     Constants.baseline: "📏",
     # Not used
     Constants.other: "❓",
-    Constants.automl: "🤖",
 }

     foundational: str = "Foundation Model"
     neural_network: str ="Neural Network"
     baseline: str = "Baseline"
+    reference: str ="Reference Pipeline"
     # Not Used
     other: str = "Other"
 model_type_emoji = {
     Constants.tree: "🌳",
     Constants.baseline: "📏",
     # Not used
     Constants.other: "❓",
+    Constants.reference:"📊",
 }

main.py CHANGED Viewed

@@ -8,40 +8,73 @@ from apscheduler.schedulers.background import BackgroundScheduler
 from constants import Constants, model_type_emoji
 from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
-TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
-INTRODUCTION_TEXT = (
-    "TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
-    "datasets manually curated. The datasets are collected to make sure they are tabular, with "
-    "permissive license without ethical issues and so on, we refer to the paper for a full "
-    "description of our approach."
-)
 ABOUT_TEXT = """
-## How It Works.
-To evaluate the leaderboard, follow install instructions in
-`https://github.com/autogluon/tabrepo/tree/tabarena` and run
-`https://github.com/autogluon/tabrepo/blob/tabarena/examples/tabarena/run_tabarena_eval.py`.
-This will generate a leaderboard. You can add your own method and contact the authors if you want it to be added
-to the leaderboard. We require method to have public code available to be considered in the leaderboard.
 """
 CITATION_BUTTON_LABEL = (
-    "If you use this leaderboard in your research please cite the following:"
 )
 CITATION_BUTTON_TEXT = r"""
 @article{
-TBA,
 }
 """
 def get_model_family(model_name: str) -> str:
     prefixes_mapping = {
-        Constants.automl: ["AutoGluon"],
         Constants.neural_network: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
         Constants.tree: ["GBM", "CAT", "EBM", "XGB", "XT", "RF"],
         Constants.foundational: ["TABDPT", "TABICL", "TABPFN"],
@@ -88,7 +121,6 @@ def load_data(filename: str):
         f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}"
     )
     # add model family information
     df_leaderboard["Type"] = df_leaderboard.loc[:, "method"].apply(
@@ -123,7 +155,9 @@ def load_data(filename: str):
     ]
     # round for better display
-    df_leaderboard[["elo", "Elo 95% CI"]] = df_leaderboard[["elo", "Elo 95% CI"]].round(0)
     df_leaderboard[["median_time_train_s_per_1K", "rank"]] = df_leaderboard[
         ["median_time_train_s_per_1K", "rank"]
     ].round(2)
@@ -139,7 +173,7 @@ def load_data(filename: str):
     return df_leaderboard.rename(
         columns={
             "median_time_train_s_per_1K": "Median Train Time (s/1K) [⬇️]",
-            "median_time_infer_s_per_1K": "Median Predict Time (s/1K)) [⬇️]",
             "method": "Model",
             "elo": "Elo [⬆️]",
             "rank": "Rank [⬇️]",
@@ -213,9 +247,9 @@ def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard:
                 label="(Not) Imputed Models.",
                 info="We impute the performance for models that cannot run on all"
                 " datasets due to task or dataset size constraints (e.g. TabPFN,"
-                " TabICL). We impute with the performance of a defaultRandomForest. "
-                " We add a postfix [X% IMPUTED] to the model if any results were "
-                "imputed. The X% shows the percentage of"
                 " datasets that were imputed. In general, imputation negatively"
                 " represents the model performance, punishing the model for not"
                 " being able to run on all datasets.",
@@ -232,7 +266,7 @@ def main():
         gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
         with gr.Tabs(elem_classes="tab-buttons"):
-            with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-tab-table", id=2):
                 df_leaderboard = load_data("tabarena_leaderboard")
                 make_leaderboard(df_leaderboard)

 from constants import Constants, model_type_emoji
 from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
+TITLE = """<h1 align="center" id="space-title">TabArena Leaderboard for Predictive Machine Learning on IID Tabular Data</h1>"""
+INTRODUCTION_TEXT = """
+TabArena is a living benchmark system for predictive machine learning on tabular data.
+The goal of TabArena and its leaderboard is to asses the peak performance of
+model-specific pipelines.
+**Datasets:** Currently, the leaderboard is based on a manually curated collection of
+51 tabular classification and regression datasets for independent and identically distributed
+(IID) data, spanning the small to medium data regime. The datasets were carefully
+curated to represent various real-world predictive machine learning use cases.
+**Models:** The focus of the leaderboard is on model-specific pipelines. Each pipeline
+is evaluated with default or tuned hyperparameter configuration or as an ensemble of
+tuned configurations. Each model is implemented in a tested real-world pipeline that was
+optimized to get the most out of the model by the maintainers of TabArena, and where
+possible together with the authors of the model.
+**Reference Pipeline:** The leaderboard includes a reference pipeline, which is applied
+independently of the tuning protocol and constraints we constructed for models within TabArena.
+The reference pipeline aims to represent the performance quickly achievable by a
+practitioner on a dataset. The current reference pipeline is the predictive machine
+learning system AutoGluon (version 1.3, with the best_quality preset and
+4 hours for training). AutoGluon represents an ensemble pipeline across various model
+types and thus provides a reference for model-specific pipelines.
+The current leaderboard is based on TabArena-v0.1.
+"""
 ABOUT_TEXT = """
+## Using TabArena for Benchmarking
+To compare your own methods to the pre-computed results for all models on the leaderboard,
+you can use the TabArena framework. For examples on how to use TabArena for benchmarking,
+please see https://github.com/TabArena/tabarena_benchmarking_examples
+## Contributing Data
+For anything related to the datasets used in TabArena, please see https://github.com/TabArena/tabarena_dataset_curation
+## Contributing to the Leaderboard
+For guidelines on how to contribute the result of your model to the official leaderboard,
+please see the appendix of our paper. <TODO: publish documentation>
+## Contact The Maintainers
+For any inquires related to TabArena, please reach out to: [email protected]
+## Core Maintainers
+The current core maintainers of TabArena are:
+[Nick Erickson](https://github.com/Innixma),
+[Lennart Purucker](https://github.com/LennartPurucker/),
+[Andrej Tschalzev](https://github.com/atschalz),
+[David Holzmüller](https://github.com/dholzmueller)
 """
 CITATION_BUTTON_LABEL = (
+    "If you use TabArena or the leaderboard in your research please cite the following:"
 )
 CITATION_BUTTON_TEXT = r"""
 @article{
+    TBA,
 }
 """
 def get_model_family(model_name: str) -> str:
     prefixes_mapping = {
+        Constants.reference: ["AutoGluon"],
         Constants.neural_network: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
         Constants.tree: ["GBM", "CAT", "EBM", "XGB", "XT", "RF"],
         Constants.foundational: ["TABDPT", "TABICL", "TABPFN"],
         f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}"
     )
     # add model family information
     df_leaderboard["Type"] = df_leaderboard.loc[:, "method"].apply(
     ]
     # round for better display
+    df_leaderboard[["elo", "Elo 95% CI"]] = df_leaderboard[["elo", "Elo 95% CI"]].round(
+        0
+    )
     df_leaderboard[["median_time_train_s_per_1K", "rank"]] = df_leaderboard[
         ["median_time_train_s_per_1K", "rank"]
     ].round(2)
     return df_leaderboard.rename(
         columns={
             "median_time_train_s_per_1K": "Median Train Time (s/1K) [⬇️]",
+            "median_time_infer_s_per_1K": "Median Predict Time (s/1K) [⬇️]",
             "method": "Model",
             "elo": "Elo [⬆️]",
             "rank": "Rank [⬇️]",
                 label="(Not) Imputed Models.",
                 info="We impute the performance for models that cannot run on all"
                 " datasets due to task or dataset size constraints (e.g. TabPFN,"
+                " TabICL). We impute with the performance of a default RandomForest."
+                " We add a postfix [X% IMPUTED] to the model if any results were"
+                " imputed. The X% shows the percentage of"
                 " datasets that were imputed. In general, imputation negatively"
                 " represents the model performance, punishing the model for not"
                 " being able to run on all datasets.",
         gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
         with gr.Tabs(elem_classes="tab-buttons"):
+            with gr.TabItem("🏅 TabArena-v0.1", elem_id="llm-benchmark-tab-table", id=2):
                 df_leaderboard = load_data("tabarena_leaderboard")
                 make_leaderboard(df_leaderboard)