Spaces:

ZENLLC
/

LMArenaLeaderboard

Running

App Files Files Community

ZENLLC commited on 15 days ago

Commit

4f09bb6

verified ·

1 Parent(s): 54d6710

Create scripts/sync_lmarena.py

Browse files

Files changed (1) hide show

scripts/sync_lmarena.py +81 -0

scripts/sync_lmarena.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# scripts/sync_lmarena.py
+from __future__ import annotations
+import json
+import os
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional, Tuple
+from huggingface_hub import HfApi, hf_hub_download
+UPSTREAM_SPACE = "lmarena-ai/lmarena-leaderboard"  # source Space
+LOCAL_DIR = Path("data/lmarena")
+LOCAL_CSV = LOCAL_DIR / "leaderboard_table_latest.csv"
+LOCAL_META = LOCAL_DIR / "sync_meta.json"
+LEADERBOARD_RE = re.compile(r"^leaderboard_table_(\d{8})\.csv$")
+def _pick_latest_leaderboard_file(files: list[str]) -> Optional[Tuple[str, str]]:
+    """
+    Returns (filename, yyyymmdd) for the newest leaderboard_table_YYYYMMDD.csv found.
+    """
+    candidates: list[Tuple[str, str]] = []
+    for f in files:
+        m = LEADERBOARD_RE.match(f)
+        if m:
+            candidates.append((f, m.group(1)))
+    if not candidates:
+        return None
+    # Sort by date string; YYYYMMDD sorts lexicographically correctly
+    candidates.sort(key=lambda x: x[1])
+    return candidates[-1]
+def main() -> int:
+    token = os.getenv("HF_TOKEN")  # optional for public, but recommended for rate limits
+    api = HfApi(token=token)
+    # list files from the upstream *space repo*
+    files = api.list_repo_files(repo_id=UPSTREAM_SPACE, repo_type="space")
+    latest = _pick_latest_leaderboard_file(files)
+    if not latest:
+        raise RuntimeError(
+            f"No leaderboard_table_YYYYMMDD.csv found in upstream Space: {UPSTREAM_SPACE}"
+        )
+    filename, yyyymmdd = latest
+    # Download the raw file to a temp location (hub cache) then copy to our repo path
+    downloaded_path = hf_hub_download(
+        repo_id=UPSTREAM_SPACE,
+        repo_type="space",
+        filename=filename,
+        token=token,
+    )
+    LOCAL_DIR.mkdir(parents=True, exist_ok=True)
+    # Copy file contents to our tracked path
+    Path(downloaded_path).replace(LOCAL_CSV) if False else LOCAL_CSV.write_bytes(Path(downloaded_path).read_bytes())
+    meta = {
+        "source_space": UPSTREAM_SPACE,
+        "source_filename": filename,
+        "source_date": yyyymmdd,
+        "synced_at_utc": datetime.now(timezone.utc).isoformat(),
+    }
+    LOCAL_META.write_text(json.dumps(meta, indent=2), encoding="utf-8")
+    print(f"[OK] Synced {filename} -> {LOCAL_CSV}")
+    print(json.dumps(meta, indent=2))
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())