Linker1907 commited on
Commit
1f960f8
·
1 Parent(s): 3142a51

Add SWE-bench Pro and EvasionBench data

Browse files

- Added ScaleAI/SWE-bench_Pro to fetch script (11 models)
- Added FutureMa/EvasionBench to fetch script (5 models)
- Total models increased from 67 to 73
- Re-fetched all data with new benchmarks
- Updated provider logos (33 organizations, 100% coverage)
- Updated LEADERBOARD_DATA in HTML

New benchmark coverage:
- SWE-Pro: 11 models (Qwen3-Coder-Next leads with 44.3%)
- EvasionBench: 5 models (GLM-4.7 leads with 82.91%)

Both benchmarks now fully functional in the leaderboard!

data/leaderboard.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "metadata": {
3
  "version": "1.0.0",
4
- "lastUpdated": "2026-03-16T15:45:23.110813Z",
5
  "title": "Official Benchmarks Leaderboard 2026",
6
  "description": "Unified leaderboard for 12 official Hugging Face benchmarks",
7
- "totalModels": 67,
8
  "totalBenchmarks": 12
9
  },
10
  "benchmarks": {
@@ -1084,69 +1084,6 @@
1084
  "coveragePercent": 58.3,
1085
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/66935cee39002fc0569c2943/Qv8QPbkgoKE3wR4jTzHiy.png"
1086
  },
1087
- {
1088
- "id": "deepseek-ai-deepseek-v3.2",
1089
- "name": "deepseek-ai/DeepSeek-V3.2",
1090
- "provider": "deepseek-ai",
1091
- "type": "open",
1092
- "released": "2024.01",
1093
- "metadata": {
1094
- "license": "Unknown",
1095
- "parameters": "Unknown",
1096
- "parametersInBillions": 685.4,
1097
- "contextWindow": 0,
1098
- "modality": "text",
1099
- "architecture": "Transformer"
1100
- },
1101
- "benchmarks": {
1102
- "mmluPro": {
1103
- "score": 85.0,
1104
- "confidence": "official",
1105
- "source": "MMLU-Pro API",
1106
- "date": "2026-03-16"
1107
- },
1108
- "gpqa": {
1109
- "score": 82.4,
1110
- "confidence": "official",
1111
- "source": "GPQA Diamond API",
1112
- "date": "2026-03-16"
1113
- },
1114
- "hle": {
1115
- "score": 40.8,
1116
- "confidence": "official",
1117
- "source": "HLE API",
1118
- "date": "2026-03-16"
1119
- },
1120
- "sweVerified": {
1121
- "score": 70.0,
1122
- "confidence": "official",
1123
- "source": "SWE-bench Verified API",
1124
- "date": "2026-03-16"
1125
- },
1126
- "aime2026": {
1127
- "score": 94.17,
1128
- "confidence": "official",
1129
- "source": "AIME 2026 API",
1130
- "date": "2026-03-16"
1131
- },
1132
- "hmmt2026": {
1133
- "score": 84.09,
1134
- "confidence": "official",
1135
- "source": "HMMT Feb 2026 API",
1136
- "date": "2026-03-16"
1137
- },
1138
- "terminalBench": {
1139
- "score": 39.6,
1140
- "confidence": "official",
1141
- "source": "Terminal-Bench 2.0 API",
1142
- "date": "2026-03-16"
1143
- }
1144
- },
1145
- "aggregateScore": 70.87,
1146
- "coverageCount": 7,
1147
- "coveragePercent": 58.3,
1148
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png"
1149
- },
1150
  {
1151
  "id": "zai-org-glm-5",
1152
  "name": "zai-org/GLM-5",
@@ -1291,6 +1228,33 @@
1291
  "coveragePercent": 8.3,
1292
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641fc216a390e539522d511f/Xtxh40e8zSzkuKtCr58DH.jpeg"
1293
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1294
  {
1295
  "id": "gair-openswe-72b",
1296
  "name": "GAIR/OpenSWE-72B",
@@ -1351,6 +1315,81 @@
1351
  "coveragePercent": 16.7,
1352
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
1353
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1354
  {
1355
  "id": "qwen-qwen3.5-122b-a10b",
1356
  "name": "Qwen/Qwen3.5-122B-A10B",
@@ -1429,6 +1468,57 @@
1429
  "coveragePercent": 8.3,
1430
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png"
1431
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1432
  {
1433
  "id": "qwen-qwen3.5-27b",
1434
  "name": "Qwen/Qwen3.5-27B",
@@ -1654,51 +1744,6 @@
1654
  "coveragePercent": 33.3,
1655
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
1656
  },
1657
- {
1658
- "id": "zai-org-glm-4.7",
1659
- "name": "zai-org/GLM-4.7",
1660
- "provider": "zai-org",
1661
- "type": "open",
1662
- "released": "2024.01",
1663
- "metadata": {
1664
- "license": "Unknown",
1665
- "parameters": "Unknown",
1666
- "parametersInBillions": 358.3,
1667
- "contextWindow": 0,
1668
- "modality": "text",
1669
- "architecture": "Transformer"
1670
- },
1671
- "benchmarks": {
1672
- "mmluPro": {
1673
- "score": 84.3,
1674
- "confidence": "official",
1675
- "source": "MMLU-Pro API",
1676
- "date": "2026-03-16"
1677
- },
1678
- "gpqa": {
1679
- "score": 85.7,
1680
- "confidence": "official",
1681
- "source": "GPQA Diamond API",
1682
- "date": "2026-03-16"
1683
- },
1684
- "hle": {
1685
- "score": 24.8,
1686
- "confidence": "official",
1687
- "source": "HLE API",
1688
- "date": "2026-03-16"
1689
- },
1690
- "terminalBench": {
1691
- "score": 33.4,
1692
- "confidence": "official",
1693
- "source": "Terminal-Bench 2.0 API",
1694
- "date": "2026-03-16"
1695
- }
1696
- },
1697
- "aggregateScore": 57.05,
1698
- "coverageCount": 4,
1699
- "coveragePercent": 33.3,
1700
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png"
1701
- },
1702
  {
1703
  "id": "qwen-qwen3.5-2b",
1704
  "name": "Qwen/Qwen3.5-2B",
@@ -1850,109 +1895,199 @@
1850
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png"
1851
  },
1852
  {
1853
- "id": "nanbeige-nanbeige4.1-3b",
1854
- "name": "Nanbeige/Nanbeige4.1-3B",
1855
- "provider": "Nanbeige",
1856
  "type": "open",
1857
  "released": "2024.01",
1858
  "metadata": {
1859
  "license": "Unknown",
1860
  "parameters": "Unknown",
1861
- "parametersInBillions": 3.9,
1862
  "contextWindow": 0,
1863
  "modality": "text",
1864
  "architecture": "Transformer"
1865
  },
1866
  "benchmarks": {
1867
- "gpqa": {
1868
- "score": 83.8,
1869
  "confidence": "official",
1870
- "source": "GPQA Diamond API",
1871
  "date": "2026-03-16"
1872
  },
1873
- "hle": {
1874
- "score": 12.6,
1875
  "confidence": "official",
1876
- "source": "HLE API",
1877
  "date": "2026-03-16"
1878
- }
 
 
 
 
 
 
1879
  },
1880
- "aggregateScore": 48.2,
1881
- "coverageCount": 2,
1882
- "coveragePercent": 16.7,
1883
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png"
1884
  },
1885
  {
1886
- "id": "nvidia-nvidia-nemotron-3-nano-30b-a3b-bf16",
1887
- "name": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
1888
- "provider": "nvidia",
1889
  "type": "open",
1890
  "released": "2024.01",
1891
  "metadata": {
1892
  "license": "Unknown",
1893
  "parameters": "Unknown",
1894
- "parametersInBillions": 31.6,
1895
  "contextWindow": 0,
1896
  "modality": "text",
1897
  "architecture": "Transformer"
1898
  },
1899
  "benchmarks": {
1900
  "mmluPro": {
1901
- "score": 78.3,
1902
  "confidence": "official",
1903
  "source": "MMLU-Pro API",
1904
  "date": "2026-03-16"
1905
  },
1906
  "hle": {
1907
- "score": 15.5,
1908
  "confidence": "official",
1909
  "source": "HLE API",
1910
  "date": "2026-03-16"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1911
  }
1912
  },
1913
- "aggregateScore": 46.9,
1914
- "coverageCount": 2,
1915
- "coveragePercent": 16.7,
1916
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png"
1917
  },
1918
  {
1919
- "id": "minimaxai-minimax-m2.1",
1920
- "name": "MiniMaxAI/MiniMax-M2.1",
1921
- "provider": "MiniMaxAI",
1922
  "type": "open",
1923
  "released": "2024.01",
1924
  "metadata": {
1925
  "license": "Unknown",
1926
  "parameters": "Unknown",
1927
- "parametersInBillions": 228.7,
1928
  "contextWindow": 0,
1929
  "modality": "text",
1930
  "architecture": "Transformer"
1931
  },
1932
  "benchmarks": {
1933
- "mmluPro": {
1934
- "score": 88.0,
1935
  "confidence": "official",
1936
- "source": "MMLU-Pro API",
1937
  "date": "2026-03-16"
1938
  },
1939
  "hle": {
1940
- "score": 22.2,
1941
  "confidence": "official",
1942
  "source": "HLE API",
1943
  "date": "2026-03-16"
1944
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1945
  "terminalBench": {
1946
- "score": 29.2,
1947
  "confidence": "official",
1948
  "source": "Terminal-Bench 2.0 API",
1949
  "date": "2026-03-16"
 
 
 
 
 
 
 
 
 
 
 
 
1950
  }
1951
  },
1952
- "aggregateScore": 46.47,
1953
  "coverageCount": 3,
1954
  "coveragePercent": 25.0,
1955
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1956
  },
1957
  {
1958
  "id": "zai-org-glm-4.7-flash",
@@ -2126,97 +2261,103 @@
2126
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg"
2127
  },
2128
  {
2129
- "id": "openai-gpt-oss-120b",
2130
- "name": "openai/gpt-oss-120b",
2131
- "provider": "openai",
2132
  "type": "open",
2133
  "released": "2024.01",
2134
  "metadata": {
2135
  "license": "Unknown",
2136
  "parameters": "Unknown",
2137
- "parametersInBillions": 120.4,
2138
  "contextWindow": 0,
2139
  "modality": "text",
2140
  "architecture": "Transformer"
2141
  },
2142
  "benchmarks": {
2143
- "gpqa": {
2144
- "score": 67.1,
2145
- "confidence": "official",
2146
- "source": "GPQA Diamond API",
2147
- "date": "2026-03-16"
2148
- },
2149
  "hle": {
2150
- "score": 5.2,
2151
  "confidence": "official",
2152
  "source": "HLE API",
2153
  "date": "2026-03-16"
2154
- },
2155
- "sweVerified": {
2156
- "score": 47.9,
2157
- "confidence": "official",
2158
- "source": "SWE-bench Verified API",
2159
- "date": "2026-03-16"
2160
  }
2161
  },
2162
- "aggregateScore": 40.07,
2163
- "coverageCount": 3,
2164
- "coveragePercent": 25.0,
2165
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png"
2166
  },
2167
  {
2168
- "id": "miromind-ai-mirothinker-v1.5-235b",
2169
- "name": "miromind-ai/MiroThinker-v1.5-235B",
2170
- "provider": "miromind-ai",
2171
  "type": "open",
2172
  "released": "2024.01",
2173
  "metadata": {
2174
  "license": "Unknown",
2175
  "parameters": "Unknown",
2176
- "parametersInBillions": 235.0,
2177
  "contextWindow": 0,
2178
  "modality": "text",
2179
  "architecture": "Transformer"
2180
  },
2181
  "benchmarks": {
2182
  "hle": {
2183
- "score": 39.2,
2184
  "confidence": "official",
2185
  "source": "HLE API",
2186
  "date": "2026-03-16"
2187
  }
2188
  },
2189
- "aggregateScore": 39.2,
2190
  "coverageCount": 1,
2191
  "coveragePercent": 8.3,
2192
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png"
2193
  },
2194
  {
2195
- "id": "nvidia-nemotron-orchestrator-8b",
2196
- "name": "nvidia/Nemotron-Orchestrator-8B",
2197
- "provider": "nvidia",
2198
  "type": "open",
2199
  "released": "2024.01",
2200
  "metadata": {
2201
  "license": "Unknown",
2202
  "parameters": "Unknown",
2203
- "parametersInBillions": 8.0,
2204
  "contextWindow": 0,
2205
  "modality": "text",
2206
  "architecture": "Transformer"
2207
  },
2208
  "benchmarks": {
 
 
 
 
 
 
2209
  "hle": {
2210
- "score": 37.1,
2211
  "confidence": "official",
2212
  "source": "HLE API",
2213
  "date": "2026-03-16"
 
 
 
 
 
 
 
 
 
 
 
 
2214
  }
2215
  },
2216
- "aggregateScore": 37.1,
2217
- "coverageCount": 1,
2218
- "coveragePercent": 8.3,
2219
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png"
2220
  },
2221
  {
2222
  "id": "openai-gpt-oss-20b",
@@ -2304,94 +2445,73 @@
2304
  "confidence": "official",
2305
  "source": "Terminal-Bench 2.0 API",
2306
  "date": "2026-03-16"
 
 
 
 
 
 
2307
  }
2308
  },
2309
- "aggregateScore": 27.8,
2310
- "coverageCount": 1,
2311
- "coveragePercent": 8.3,
2312
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
2313
  },
2314
  {
2315
- "id": "zai-org-glm-4.6",
2316
- "name": "zai-org/GLM-4.6",
2317
- "provider": "zai-org",
2318
  "type": "open",
2319
  "released": "2024.01",
2320
  "metadata": {
2321
  "license": "Unknown",
2322
  "parameters": "Unknown",
2323
- "parametersInBillions": 356.8,
2324
  "contextWindow": 0,
2325
  "modality": "text",
2326
  "architecture": "Transformer"
2327
  },
2328
  "benchmarks": {
2329
- "terminalBench": {
2330
- "score": 24.5,
2331
  "confidence": "official",
2332
- "source": "Terminal-Bench 2.0 API",
2333
  "date": "2026-03-16"
2334
  }
2335
  },
2336
- "aggregateScore": 24.5,
2337
  "coverageCount": 1,
2338
  "coveragePercent": 8.3,
2339
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png"
2340
  },
2341
  {
2342
- "id": "qwen-qwen3-coder-480b-a35b-instruct",
2343
- "name": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
2344
  "provider": "Qwen",
2345
  "type": "open",
2346
  "released": "2024.01",
2347
  "metadata": {
2348
  "license": "Unknown",
2349
  "parameters": "Unknown",
2350
- "parametersInBillions": 480.2,
2351
  "contextWindow": 0,
2352
  "modality": "text",
2353
  "architecture": "Transformer"
2354
  },
2355
  "benchmarks": {
2356
- "terminalBench": {
2357
- "score": 23.9,
2358
  "confidence": "official",
2359
- "source": "Terminal-Bench 2.0 API",
2360
  "date": "2026-03-16"
2361
  }
2362
  },
2363
- "aggregateScore": 23.9,
2364
  "coverageCount": 1,
2365
  "coveragePercent": 8.3,
2366
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
2367
  },
2368
- {
2369
- "id": "xiaomimimo-mimo-v2-flash",
2370
- "name": "XiaomiMiMo/MiMo-V2-Flash",
2371
- "provider": "XiaomiMiMo",
2372
- "type": "open",
2373
- "released": "2024.01",
2374
- "metadata": {
2375
- "license": "Unknown",
2376
- "parameters": "Unknown",
2377
- "parametersInBillions": 309.8,
2378
- "contextWindow": 0,
2379
- "modality": "text",
2380
- "architecture": "Transformer"
2381
- },
2382
- "benchmarks": {
2383
- "hle": {
2384
- "score": 22.1,
2385
- "confidence": "official",
2386
- "source": "HLE API",
2387
- "date": "2026-03-16"
2388
- }
2389
- },
2390
- "aggregateScore": 22.1,
2391
- "coverageCount": 1,
2392
- "coveragePercent": 8.3,
2393
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg"
2394
- },
2395
  {
2396
  "id": "qwen-qwen3.5-0.8b",
2397
  "name": "Qwen/Qwen3.5-0.8B",
@@ -2452,6 +2572,93 @@
2452
  "coveragePercent": 8.3,
2453
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png"
2454
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2455
  {
2456
  "id": "helpingai-dhanishtha-2.0-0126",
2457
  "name": "HelpingAI/Dhanishtha-2.0-0126",
@@ -2478,6 +2685,33 @@
2478
  "coverageCount": 1,
2479
  "coveragePercent": 8.3,
2480
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2481
  }
2482
  ]
2483
  }
 
1
  {
2
  "metadata": {
3
  "version": "1.0.0",
4
+ "lastUpdated": "2026-03-16T16:20:45.100745Z",
5
  "title": "Official Benchmarks Leaderboard 2026",
6
  "description": "Unified leaderboard for 12 official Hugging Face benchmarks",
7
+ "totalModels": 73,
8
  "totalBenchmarks": 12
9
  },
10
  "benchmarks": {
 
1084
  "coveragePercent": 58.3,
1085
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/66935cee39002fc0569c2943/Qv8QPbkgoKE3wR4jTzHiy.png"
1086
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
  {
1088
  "id": "zai-org-glm-5",
1089
  "name": "zai-org/GLM-5",
 
1228
  "coveragePercent": 8.3,
1229
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641fc216a390e539522d511f/Xtxh40e8zSzkuKtCr58DH.jpeg"
1230
  },
1231
+ {
1232
+ "id": "moonshotai-kimi-k2-instruct-0905",
1233
+ "name": "moonshotai/Kimi-K2-Instruct-0905",
1234
+ "provider": "moonshotai",
1235
+ "type": "open",
1236
+ "released": "2024.01",
1237
+ "metadata": {
1238
+ "license": "Unknown",
1239
+ "parameters": "Unknown",
1240
+ "parametersInBillions": 1026.5,
1241
+ "contextWindow": 0,
1242
+ "modality": "text",
1243
+ "architecture": "Transformer"
1244
+ },
1245
+ "benchmarks": {
1246
+ "evasionBench": {
1247
+ "score": 66.68,
1248
+ "confidence": "official",
1249
+ "source": "EvasionBench API",
1250
+ "date": "2026-03-16"
1251
+ }
1252
+ },
1253
+ "aggregateScore": 66.68,
1254
+ "coverageCount": 1,
1255
+ "coveragePercent": 8.3,
1256
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
1257
+ },
1258
  {
1259
  "id": "gair-openswe-72b",
1260
  "name": "GAIR/OpenSWE-72B",
 
1315
  "coveragePercent": 16.7,
1316
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
1317
  },
1318
+ {
1319
+ "id": "deepseek-ai-deepseek-v3.2",
1320
+ "name": "deepseek-ai/DeepSeek-V3.2",
1321
+ "provider": "deepseek-ai",
1322
+ "type": "open",
1323
+ "released": "2024.01",
1324
+ "metadata": {
1325
+ "license": "Unknown",
1326
+ "parameters": "Unknown",
1327
+ "parametersInBillions": 685.4,
1328
+ "contextWindow": 0,
1329
+ "modality": "text",
1330
+ "architecture": "Transformer"
1331
+ },
1332
+ "benchmarks": {
1333
+ "mmluPro": {
1334
+ "score": 85.0,
1335
+ "confidence": "official",
1336
+ "source": "MMLU-Pro API",
1337
+ "date": "2026-03-16"
1338
+ },
1339
+ "gpqa": {
1340
+ "score": 82.4,
1341
+ "confidence": "official",
1342
+ "source": "GPQA Diamond API",
1343
+ "date": "2026-03-16"
1344
+ },
1345
+ "hle": {
1346
+ "score": 40.8,
1347
+ "confidence": "official",
1348
+ "source": "HLE API",
1349
+ "date": "2026-03-16"
1350
+ },
1351
+ "sweVerified": {
1352
+ "score": 70.0,
1353
+ "confidence": "official",
1354
+ "source": "SWE-bench Verified API",
1355
+ "date": "2026-03-16"
1356
+ },
1357
+ "aime2026": {
1358
+ "score": 94.17,
1359
+ "confidence": "official",
1360
+ "source": "AIME 2026 API",
1361
+ "date": "2026-03-16"
1362
+ },
1363
+ "hmmt2026": {
1364
+ "score": 84.09,
1365
+ "confidence": "official",
1366
+ "source": "HMMT Feb 2026 API",
1367
+ "date": "2026-03-16"
1368
+ },
1369
+ "terminalBench": {
1370
+ "score": 39.6,
1371
+ "confidence": "official",
1372
+ "source": "Terminal-Bench 2.0 API",
1373
+ "date": "2026-03-16"
1374
+ },
1375
+ "swePro": {
1376
+ "score": 15.56,
1377
+ "confidence": "official",
1378
+ "source": "SWE-bench Pro API",
1379
+ "date": "2026-03-16"
1380
+ },
1381
+ "evasionBench": {
1382
+ "score": 66.88,
1383
+ "confidence": "official",
1384
+ "source": "EvasionBench API",
1385
+ "date": "2026-03-16"
1386
+ }
1387
+ },
1388
+ "aggregateScore": 64.28,
1389
+ "coverageCount": 9,
1390
+ "coveragePercent": 75.0,
1391
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png"
1392
+ },
1393
  {
1394
  "id": "qwen-qwen3.5-122b-a10b",
1395
  "name": "Qwen/Qwen3.5-122B-A10B",
 
1468
  "coveragePercent": 8.3,
1469
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png"
1470
  },
1471
+ {
1472
+ "id": "zai-org-glm-4.7",
1473
+ "name": "zai-org/GLM-4.7",
1474
+ "provider": "zai-org",
1475
+ "type": "open",
1476
+ "released": "2024.01",
1477
+ "metadata": {
1478
+ "license": "Unknown",
1479
+ "parameters": "Unknown",
1480
+ "parametersInBillions": 358.3,
1481
+ "contextWindow": 0,
1482
+ "modality": "text",
1483
+ "architecture": "Transformer"
1484
+ },
1485
+ "benchmarks": {
1486
+ "mmluPro": {
1487
+ "score": 84.3,
1488
+ "confidence": "official",
1489
+ "source": "MMLU-Pro API",
1490
+ "date": "2026-03-16"
1491
+ },
1492
+ "gpqa": {
1493
+ "score": 85.7,
1494
+ "confidence": "official",
1495
+ "source": "GPQA Diamond API",
1496
+ "date": "2026-03-16"
1497
+ },
1498
+ "hle": {
1499
+ "score": 24.8,
1500
+ "confidence": "official",
1501
+ "source": "HLE API",
1502
+ "date": "2026-03-16"
1503
+ },
1504
+ "terminalBench": {
1505
+ "score": 33.4,
1506
+ "confidence": "official",
1507
+ "source": "Terminal-Bench 2.0 API",
1508
+ "date": "2026-03-16"
1509
+ },
1510
+ "evasionBench": {
1511
+ "score": 82.91,
1512
+ "confidence": "official",
1513
+ "source": "EvasionBench API",
1514
+ "date": "2026-03-16"
1515
+ }
1516
+ },
1517
+ "aggregateScore": 62.22,
1518
+ "coverageCount": 5,
1519
+ "coveragePercent": 41.7,
1520
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png"
1521
+ },
1522
  {
1523
  "id": "qwen-qwen3.5-27b",
1524
  "name": "Qwen/Qwen3.5-27B",
 
1744
  "coveragePercent": 33.3,
1745
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
1746
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1747
  {
1748
  "id": "qwen-qwen3.5-2b",
1749
  "name": "Qwen/Qwen3.5-2B",
 
1895
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png"
1896
  },
1897
  {
1898
+ "id": "qwen-qwen3-coder-next",
1899
+ "name": "Qwen/Qwen3-Coder-Next",
1900
+ "provider": "Qwen",
1901
  "type": "open",
1902
  "released": "2024.01",
1903
  "metadata": {
1904
  "license": "Unknown",
1905
  "parameters": "Unknown",
1906
+ "parametersInBillions": 79.7,
1907
  "contextWindow": 0,
1908
  "modality": "text",
1909
  "architecture": "Transformer"
1910
  },
1911
  "benchmarks": {
1912
+ "sweVerified": {
1913
+ "score": 70.6,
1914
  "confidence": "official",
1915
+ "source": "SWE-bench Verified API",
1916
  "date": "2026-03-16"
1917
  },
1918
+ "terminalBench": {
1919
+ "score": 36.2,
1920
  "confidence": "official",
1921
+ "source": "Terminal-Bench 2.0 API",
1922
  "date": "2026-03-16"
1923
+ },
1924
+ "swePro": {
1925
+ "score": 44.3,
1926
+ "confidence": "official",
1927
+ "source": "SWE-bench Pro API",
1928
+ "date": "2026-03-16"
1929
+ }
1930
  },
1931
+ "aggregateScore": 50.37,
1932
+ "coverageCount": 3,
1933
+ "coveragePercent": 25.0,
1934
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
1935
  },
1936
  {
1937
+ "id": "minimaxai-minimax-m2.1",
1938
+ "name": "MiniMaxAI/MiniMax-M2.1",
1939
+ "provider": "MiniMaxAI",
1940
  "type": "open",
1941
  "released": "2024.01",
1942
  "metadata": {
1943
  "license": "Unknown",
1944
  "parameters": "Unknown",
1945
+ "parametersInBillions": 228.7,
1946
  "contextWindow": 0,
1947
  "modality": "text",
1948
  "architecture": "Transformer"
1949
  },
1950
  "benchmarks": {
1951
  "mmluPro": {
1952
+ "score": 88.0,
1953
  "confidence": "official",
1954
  "source": "MMLU-Pro API",
1955
  "date": "2026-03-16"
1956
  },
1957
  "hle": {
1958
+ "score": 22.2,
1959
  "confidence": "official",
1960
  "source": "HLE API",
1961
  "date": "2026-03-16"
1962
+ },
1963
+ "terminalBench": {
1964
+ "score": 29.2,
1965
+ "confidence": "official",
1966
+ "source": "Terminal-Bench 2.0 API",
1967
+ "date": "2026-03-16"
1968
+ },
1969
+ "swePro": {
1970
+ "score": 36.81,
1971
+ "confidence": "official",
1972
+ "source": "SWE-bench Pro API",
1973
+ "date": "2026-03-16"
1974
+ },
1975
+ "evasionBench": {
1976
+ "score": 71.31,
1977
+ "confidence": "official",
1978
+ "source": "EvasionBench API",
1979
+ "date": "2026-03-16"
1980
  }
1981
  },
1982
+ "aggregateScore": 49.5,
1983
+ "coverageCount": 5,
1984
+ "coveragePercent": 41.7,
1985
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg"
1986
  },
1987
  {
1988
+ "id": "nanbeige-nanbeige4.1-3b",
1989
+ "name": "Nanbeige/Nanbeige4.1-3B",
1990
+ "provider": "Nanbeige",
1991
  "type": "open",
1992
  "released": "2024.01",
1993
  "metadata": {
1994
  "license": "Unknown",
1995
  "parameters": "Unknown",
1996
+ "parametersInBillions": 3.9,
1997
  "contextWindow": 0,
1998
  "modality": "text",
1999
  "architecture": "Transformer"
2000
  },
2001
  "benchmarks": {
2002
+ "gpqa": {
2003
+ "score": 83.8,
2004
  "confidence": "official",
2005
+ "source": "GPQA Diamond API",
2006
  "date": "2026-03-16"
2007
  },
2008
  "hle": {
2009
+ "score": 12.6,
2010
  "confidence": "official",
2011
  "source": "HLE API",
2012
  "date": "2026-03-16"
2013
+ }
2014
+ },
2015
+ "aggregateScore": 48.2,
2016
+ "coverageCount": 2,
2017
+ "coveragePercent": 16.7,
2018
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png"
2019
+ },
2020
+ {
2021
+ "id": "qwen-qwen3-coder-480b-a35b-instruct",
2022
+ "name": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
2023
+ "provider": "Qwen",
2024
+ "type": "open",
2025
+ "released": "2024.01",
2026
+ "metadata": {
2027
+ "license": "Unknown",
2028
+ "parameters": "Unknown",
2029
+ "parametersInBillions": 480.2,
2030
+ "contextWindow": 0,
2031
+ "modality": "text",
2032
+ "architecture": "Transformer"
2033
+ },
2034
+ "benchmarks": {
2035
  "terminalBench": {
2036
+ "score": 23.9,
2037
  "confidence": "official",
2038
  "source": "Terminal-Bench 2.0 API",
2039
  "date": "2026-03-16"
2040
+ },
2041
+ "swePro": {
2042
+ "score": 38.7,
2043
+ "confidence": "official",
2044
+ "source": "SWE-bench Pro API",
2045
+ "date": "2026-03-16"
2046
+ },
2047
+ "evasionBench": {
2048
+ "score": 78.16,
2049
+ "confidence": "official",
2050
+ "source": "EvasionBench API",
2051
+ "date": "2026-03-16"
2052
  }
2053
  },
2054
+ "aggregateScore": 46.92,
2055
  "coverageCount": 3,
2056
  "coveragePercent": 25.0,
2057
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
2058
+ },
2059
+ {
2060
+ "id": "nvidia-nvidia-nemotron-3-nano-30b-a3b-bf16",
2061
+ "name": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
2062
+ "provider": "nvidia",
2063
+ "type": "open",
2064
+ "released": "2024.01",
2065
+ "metadata": {
2066
+ "license": "Unknown",
2067
+ "parameters": "Unknown",
2068
+ "parametersInBillions": 31.6,
2069
+ "contextWindow": 0,
2070
+ "modality": "text",
2071
+ "architecture": "Transformer"
2072
+ },
2073
+ "benchmarks": {
2074
+ "mmluPro": {
2075
+ "score": 78.3,
2076
+ "confidence": "official",
2077
+ "source": "MMLU-Pro API",
2078
+ "date": "2026-03-16"
2079
+ },
2080
+ "hle": {
2081
+ "score": 15.5,
2082
+ "confidence": "official",
2083
+ "source": "HLE API",
2084
+ "date": "2026-03-16"
2085
+ }
2086
+ },
2087
+ "aggregateScore": 46.9,
2088
+ "coverageCount": 2,
2089
+ "coveragePercent": 16.7,
2090
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png"
2091
  },
2092
  {
2093
  "id": "zai-org-glm-4.7-flash",
 
2261
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg"
2262
  },
2263
  {
2264
+ "id": "miromind-ai-mirothinker-v1.5-235b",
2265
+ "name": "miromind-ai/MiroThinker-v1.5-235B",
2266
+ "provider": "miromind-ai",
2267
  "type": "open",
2268
  "released": "2024.01",
2269
  "metadata": {
2270
  "license": "Unknown",
2271
  "parameters": "Unknown",
2272
+ "parametersInBillions": 235.0,
2273
  "contextWindow": 0,
2274
  "modality": "text",
2275
  "architecture": "Transformer"
2276
  },
2277
  "benchmarks": {
 
 
 
 
 
 
2278
  "hle": {
2279
+ "score": 39.2,
2280
  "confidence": "official",
2281
  "source": "HLE API",
2282
  "date": "2026-03-16"
 
 
 
 
 
 
2283
  }
2284
  },
2285
+ "aggregateScore": 39.2,
2286
+ "coverageCount": 1,
2287
+ "coveragePercent": 8.3,
2288
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png"
2289
  },
2290
  {
2291
+ "id": "nvidia-nemotron-orchestrator-8b",
2292
+ "name": "nvidia/Nemotron-Orchestrator-8B",
2293
+ "provider": "nvidia",
2294
  "type": "open",
2295
  "released": "2024.01",
2296
  "metadata": {
2297
  "license": "Unknown",
2298
  "parameters": "Unknown",
2299
+ "parametersInBillions": 8.0,
2300
  "contextWindow": 0,
2301
  "modality": "text",
2302
  "architecture": "Transformer"
2303
  },
2304
  "benchmarks": {
2305
  "hle": {
2306
+ "score": 37.1,
2307
  "confidence": "official",
2308
  "source": "HLE API",
2309
  "date": "2026-03-16"
2310
  }
2311
  },
2312
+ "aggregateScore": 37.1,
2313
  "coverageCount": 1,
2314
  "coveragePercent": 8.3,
2315
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png"
2316
  },
2317
  {
2318
+ "id": "openai-gpt-oss-120b",
2319
+ "name": "openai/gpt-oss-120b",
2320
+ "provider": "openai",
2321
  "type": "open",
2322
  "released": "2024.01",
2323
  "metadata": {
2324
  "license": "Unknown",
2325
  "parameters": "Unknown",
2326
+ "parametersInBillions": 120.4,
2327
  "contextWindow": 0,
2328
  "modality": "text",
2329
  "architecture": "Transformer"
2330
  },
2331
  "benchmarks": {
2332
+ "gpqa": {
2333
+ "score": 67.1,
2334
+ "confidence": "official",
2335
+ "source": "GPQA Diamond API",
2336
+ "date": "2026-03-16"
2337
+ },
2338
  "hle": {
2339
+ "score": 5.2,
2340
  "confidence": "official",
2341
  "source": "HLE API",
2342
  "date": "2026-03-16"
2343
+ },
2344
+ "sweVerified": {
2345
+ "score": 47.9,
2346
+ "confidence": "official",
2347
+ "source": "SWE-bench Verified API",
2348
+ "date": "2026-03-16"
2349
+ },
2350
+ "swePro": {
2351
+ "score": 16.2,
2352
+ "confidence": "official",
2353
+ "source": "SWE-bench Pro API",
2354
+ "date": "2026-03-16"
2355
  }
2356
  },
2357
+ "aggregateScore": 34.1,
2358
+ "coverageCount": 4,
2359
+ "coveragePercent": 33.3,
2360
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png"
2361
  },
2362
  {
2363
  "id": "openai-gpt-oss-20b",
 
2445
  "confidence": "official",
2446
  "source": "Terminal-Bench 2.0 API",
2447
  "date": "2026-03-16"
2448
+ },
2449
+ "swePro": {
2450
+ "score": 27.67,
2451
+ "confidence": "official",
2452
+ "source": "SWE-bench Pro API",
2453
+ "date": "2026-03-16"
2454
  }
2455
  },
2456
+ "aggregateScore": 27.73,
2457
+ "coverageCount": 2,
2458
+ "coveragePercent": 16.7,
2459
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
2460
  },
2461
  {
2462
+ "id": "xiaomimimo-mimo-v2-flash",
2463
+ "name": "XiaomiMiMo/MiMo-V2-Flash",
2464
+ "provider": "XiaomiMiMo",
2465
  "type": "open",
2466
  "released": "2024.01",
2467
  "metadata": {
2468
  "license": "Unknown",
2469
  "parameters": "Unknown",
2470
+ "parametersInBillions": 309.8,
2471
  "contextWindow": 0,
2472
  "modality": "text",
2473
  "architecture": "Transformer"
2474
  },
2475
  "benchmarks": {
2476
+ "hle": {
2477
+ "score": 22.1,
2478
  "confidence": "official",
2479
+ "source": "HLE API",
2480
  "date": "2026-03-16"
2481
  }
2482
  },
2483
+ "aggregateScore": 22.1,
2484
  "coverageCount": 1,
2485
  "coveragePercent": 8.3,
2486
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg"
2487
  },
2488
  {
2489
+ "id": "qwen-qwen3-235b-a22b",
2490
+ "name": "Qwen/Qwen3-235B-A22B",
2491
  "provider": "Qwen",
2492
  "type": "open",
2493
  "released": "2024.01",
2494
  "metadata": {
2495
  "license": "Unknown",
2496
  "parameters": "Unknown",
2497
+ "parametersInBillions": 235.1,
2498
  "contextWindow": 0,
2499
  "modality": "text",
2500
  "architecture": "Transformer"
2501
  },
2502
  "benchmarks": {
2503
+ "swePro": {
2504
+ "score": 21.41,
2505
  "confidence": "official",
2506
+ "source": "SWE-bench Pro API",
2507
  "date": "2026-03-16"
2508
  }
2509
  },
2510
+ "aggregateScore": 21.41,
2511
  "coverageCount": 1,
2512
  "coveragePercent": 8.3,
2513
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
2514
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2515
  {
2516
  "id": "qwen-qwen3.5-0.8b",
2517
  "name": "Qwen/Qwen3.5-0.8B",
 
2572
  "coveragePercent": 8.3,
2573
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png"
2574
  },
2575
+ {
2576
+ "id": "zai-org-glm-4.6",
2577
+ "name": "zai-org/GLM-4.6",
2578
+ "provider": "zai-org",
2579
+ "type": "open",
2580
+ "released": "2024.01",
2581
+ "metadata": {
2582
+ "license": "Unknown",
2583
+ "parameters": "Unknown",
2584
+ "parametersInBillions": 356.8,
2585
+ "contextWindow": 0,
2586
+ "modality": "text",
2587
+ "architecture": "Transformer"
2588
+ },
2589
+ "benchmarks": {
2590
+ "terminalBench": {
2591
+ "score": 24.5,
2592
+ "confidence": "official",
2593
+ "source": "Terminal-Bench 2.0 API",
2594
+ "date": "2026-03-16"
2595
+ },
2596
+ "swePro": {
2597
+ "score": 9.67,
2598
+ "confidence": "official",
2599
+ "source": "SWE-bench Pro API",
2600
+ "date": "2026-03-16"
2601
+ }
2602
+ },
2603
+ "aggregateScore": 17.09,
2604
+ "coverageCount": 2,
2605
+ "coveragePercent": 16.7,
2606
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png"
2607
+ },
2608
+ {
2609
+ "id": "google-gemma-3-27b-it",
2610
+ "name": "google/gemma-3-27b-it",
2611
+ "provider": "google",
2612
+ "type": "open",
2613
+ "released": "2024.01",
2614
+ "metadata": {
2615
+ "license": "Unknown",
2616
+ "parameters": "Unknown",
2617
+ "parametersInBillions": 27.4,
2618
+ "contextWindow": 0,
2619
+ "modality": "text",
2620
+ "architecture": "Transformer"
2621
+ },
2622
+ "benchmarks": {
2623
+ "swePro": {
2624
+ "score": 11.38,
2625
+ "confidence": "official",
2626
+ "source": "SWE-bench Pro API",
2627
+ "date": "2026-03-16"
2628
+ }
2629
+ },
2630
+ "aggregateScore": 11.38,
2631
+ "coverageCount": 1,
2632
+ "coveragePercent": 8.3,
2633
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png"
2634
+ },
2635
+ {
2636
+ "id": "meta-llama-llama-3.1-405b-instruct",
2637
+ "name": "meta-llama/Llama-3.1-405B-Instruct",
2638
+ "provider": "meta-llama",
2639
+ "type": "open",
2640
+ "released": "2024.01",
2641
+ "metadata": {
2642
+ "license": "Unknown",
2643
+ "parameters": "Unknown",
2644
+ "parametersInBillions": 405.9,
2645
+ "contextWindow": 0,
2646
+ "modality": "text",
2647
+ "architecture": "Transformer"
2648
+ },
2649
+ "benchmarks": {
2650
+ "swePro": {
2651
+ "score": 11.18,
2652
+ "confidence": "official",
2653
+ "source": "SWE-bench Pro API",
2654
+ "date": "2026-03-16"
2655
+ }
2656
+ },
2657
+ "aggregateScore": 11.18,
2658
+ "coverageCount": 1,
2659
+ "coveragePercent": 8.3,
2660
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png"
2661
+ },
2662
  {
2663
  "id": "helpingai-dhanishtha-2.0-0126",
2664
  "name": "HelpingAI/Dhanishtha-2.0-0126",
 
2685
  "coverageCount": 1,
2686
  "coveragePercent": 8.3,
2687
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png"
2688
+ },
2689
+ {
2690
+ "id": "meta-llama-llama-4-maverick-17b-128e-instruct",
2691
+ "name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
2692
+ "provider": "meta-llama",
2693
+ "type": "open",
2694
+ "released": "2024.01",
2695
+ "metadata": {
2696
+ "license": "Unknown",
2697
+ "parameters": "Unknown",
2698
+ "parametersInBillions": 401.6,
2699
+ "contextWindow": 0,
2700
+ "modality": "text",
2701
+ "architecture": "Transformer"
2702
+ },
2703
+ "benchmarks": {
2704
+ "swePro": {
2705
+ "score": 5.24,
2706
+ "confidence": "official",
2707
+ "source": "SWE-bench Pro API",
2708
+ "date": "2026-03-16"
2709
+ }
2710
+ },
2711
+ "aggregateScore": 5.24,
2712
+ "coverageCount": 1,
2713
+ "coveragePercent": 8.3,
2714
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png"
2715
  }
2716
  ]
2717
  }
data/provider_logos.json CHANGED
@@ -13,6 +13,7 @@
13
  "arcee-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6435718aaaef013d1aec3b8b/GZPnGkfMn8Ino6JbkL4fJ.png",
14
  "datalab-to": "https://cdn-avatars.huggingface.co/v1/production/uploads/67ab6afe315e622f597bf9e8/YOgg0gVYVXZC1PDIHFTWK.png",
15
  "deepseek-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png",
 
16
  "infly": "https://cdn-avatars.huggingface.co/v1/production/uploads/63ed9862679c2cc40abb55d2/0n6g0jngiKkRjaEoAvPmM.png",
17
  "jdopensource": "https://cdn-avatars.huggingface.co/v1/production/uploads/68c0e2ab44ea28a974e3074b/g-4gTubd16qUtwmGZ0n4h.png",
18
  "lightonai": "https://cdn-avatars.huggingface.co/v1/production/uploads/1651597775471-62715572ab9243b5d40cbb1d.png",
 
13
  "arcee-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6435718aaaef013d1aec3b8b/GZPnGkfMn8Ino6JbkL4fJ.png",
14
  "datalab-to": "https://cdn-avatars.huggingface.co/v1/production/uploads/67ab6afe315e622f597bf9e8/YOgg0gVYVXZC1PDIHFTWK.png",
15
  "deepseek-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png",
16
+ "google": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png",
17
  "infly": "https://cdn-avatars.huggingface.co/v1/production/uploads/63ed9862679c2cc40abb55d2/0n6g0jngiKkRjaEoAvPmM.png",
18
  "jdopensource": "https://cdn-avatars.huggingface.co/v1/production/uploads/68c0e2ab44ea28a974e3074b/g-4gTubd16qUtwmGZ0n4h.png",
19
  "lightonai": "https://cdn-avatars.huggingface.co/v1/production/uploads/1651597775471-62715572ab9243b5d40cbb1d.png",
index.html CHANGED
@@ -394,10 +394,10 @@ const LEADERBOARD_DATA =
394
  {
395
  "metadata": {
396
  "version": "1.0.0",
397
- "lastUpdated": "2026-03-16T15:45:23.110813Z",
398
  "title": "Official Benchmarks Leaderboard 2026",
399
  "description": "Unified leaderboard for 12 official Hugging Face benchmarks",
400
- "totalModels": 67,
401
  "totalBenchmarks": 12
402
  },
403
  "benchmarks": {
@@ -1477,69 +1477,6 @@ const LEADERBOARD_DATA =
1477
  "coveragePercent": 58.3,
1478
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/66935cee39002fc0569c2943/Qv8QPbkgoKE3wR4jTzHiy.png"
1479
  },
1480
- {
1481
- "id": "deepseek-ai-deepseek-v3.2",
1482
- "name": "deepseek-ai/DeepSeek-V3.2",
1483
- "provider": "deepseek-ai",
1484
- "type": "open",
1485
- "released": "2024.01",
1486
- "metadata": {
1487
- "license": "Unknown",
1488
- "parameters": "Unknown",
1489
- "parametersInBillions": 685.4,
1490
- "contextWindow": 0,
1491
- "modality": "text",
1492
- "architecture": "Transformer"
1493
- },
1494
- "benchmarks": {
1495
- "mmluPro": {
1496
- "score": 85.0,
1497
- "confidence": "official",
1498
- "source": "MMLU-Pro API",
1499
- "date": "2026-03-16"
1500
- },
1501
- "gpqa": {
1502
- "score": 82.4,
1503
- "confidence": "official",
1504
- "source": "GPQA Diamond API",
1505
- "date": "2026-03-16"
1506
- },
1507
- "hle": {
1508
- "score": 40.8,
1509
- "confidence": "official",
1510
- "source": "HLE API",
1511
- "date": "2026-03-16"
1512
- },
1513
- "sweVerified": {
1514
- "score": 70.0,
1515
- "confidence": "official",
1516
- "source": "SWE-bench Verified API",
1517
- "date": "2026-03-16"
1518
- },
1519
- "aime2026": {
1520
- "score": 94.17,
1521
- "confidence": "official",
1522
- "source": "AIME 2026 API",
1523
- "date": "2026-03-16"
1524
- },
1525
- "hmmt2026": {
1526
- "score": 84.09,
1527
- "confidence": "official",
1528
- "source": "HMMT Feb 2026 API",
1529
- "date": "2026-03-16"
1530
- },
1531
- "terminalBench": {
1532
- "score": 39.6,
1533
- "confidence": "official",
1534
- "source": "Terminal-Bench 2.0 API",
1535
- "date": "2026-03-16"
1536
- }
1537
- },
1538
- "aggregateScore": 70.87,
1539
- "coverageCount": 7,
1540
- "coveragePercent": 58.3,
1541
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png"
1542
- },
1543
  {
1544
  "id": "zai-org-glm-5",
1545
  "name": "zai-org/GLM-5",
@@ -1684,6 +1621,33 @@ const LEADERBOARD_DATA =
1684
  "coveragePercent": 8.3,
1685
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641fc216a390e539522d511f/Xtxh40e8zSzkuKtCr58DH.jpeg"
1686
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1687
  {
1688
  "id": "gair-openswe-72b",
1689
  "name": "GAIR/OpenSWE-72B",
@@ -1744,6 +1708,81 @@ const LEADERBOARD_DATA =
1744
  "coveragePercent": 16.7,
1745
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
1746
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1747
  {
1748
  "id": "qwen-qwen3.5-122b-a10b",
1749
  "name": "Qwen/Qwen3.5-122B-A10B",
@@ -1822,6 +1861,57 @@ const LEADERBOARD_DATA =
1822
  "coveragePercent": 8.3,
1823
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png"
1824
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1825
  {
1826
  "id": "qwen-qwen3.5-27b",
1827
  "name": "Qwen/Qwen3.5-27B",
@@ -2047,51 +2137,6 @@ const LEADERBOARD_DATA =
2047
  "coveragePercent": 33.3,
2048
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
2049
  },
2050
- {
2051
- "id": "zai-org-glm-4.7",
2052
- "name": "zai-org/GLM-4.7",
2053
- "provider": "zai-org",
2054
- "type": "open",
2055
- "released": "2024.01",
2056
- "metadata": {
2057
- "license": "Unknown",
2058
- "parameters": "Unknown",
2059
- "parametersInBillions": 358.3,
2060
- "contextWindow": 0,
2061
- "modality": "text",
2062
- "architecture": "Transformer"
2063
- },
2064
- "benchmarks": {
2065
- "mmluPro": {
2066
- "score": 84.3,
2067
- "confidence": "official",
2068
- "source": "MMLU-Pro API",
2069
- "date": "2026-03-16"
2070
- },
2071
- "gpqa": {
2072
- "score": 85.7,
2073
- "confidence": "official",
2074
- "source": "GPQA Diamond API",
2075
- "date": "2026-03-16"
2076
- },
2077
- "hle": {
2078
- "score": 24.8,
2079
- "confidence": "official",
2080
- "source": "HLE API",
2081
- "date": "2026-03-16"
2082
- },
2083
- "terminalBench": {
2084
- "score": 33.4,
2085
- "confidence": "official",
2086
- "source": "Terminal-Bench 2.0 API",
2087
- "date": "2026-03-16"
2088
- }
2089
- },
2090
- "aggregateScore": 57.05,
2091
- "coverageCount": 4,
2092
- "coveragePercent": 33.3,
2093
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png"
2094
- },
2095
  {
2096
  "id": "qwen-qwen3.5-2b",
2097
  "name": "Qwen/Qwen3.5-2B",
@@ -2243,109 +2288,199 @@ const LEADERBOARD_DATA =
2243
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png"
2244
  },
2245
  {
2246
- "id": "nanbeige-nanbeige4.1-3b",
2247
- "name": "Nanbeige/Nanbeige4.1-3B",
2248
- "provider": "Nanbeige",
2249
  "type": "open",
2250
  "released": "2024.01",
2251
  "metadata": {
2252
  "license": "Unknown",
2253
  "parameters": "Unknown",
2254
- "parametersInBillions": 3.9,
2255
  "contextWindow": 0,
2256
  "modality": "text",
2257
  "architecture": "Transformer"
2258
  },
2259
  "benchmarks": {
2260
- "gpqa": {
2261
- "score": 83.8,
2262
  "confidence": "official",
2263
- "source": "GPQA Diamond API",
2264
  "date": "2026-03-16"
2265
  },
2266
- "hle": {
2267
- "score": 12.6,
2268
  "confidence": "official",
2269
- "source": "HLE API",
2270
  "date": "2026-03-16"
2271
- }
 
 
 
 
 
 
2272
  },
2273
- "aggregateScore": 48.2,
2274
- "coverageCount": 2,
2275
- "coveragePercent": 16.7,
2276
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png"
2277
  },
2278
  {
2279
- "id": "nvidia-nvidia-nemotron-3-nano-30b-a3b-bf16",
2280
- "name": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
2281
- "provider": "nvidia",
2282
  "type": "open",
2283
  "released": "2024.01",
2284
  "metadata": {
2285
  "license": "Unknown",
2286
  "parameters": "Unknown",
2287
- "parametersInBillions": 31.6,
2288
  "contextWindow": 0,
2289
  "modality": "text",
2290
  "architecture": "Transformer"
2291
  },
2292
  "benchmarks": {
2293
  "mmluPro": {
2294
- "score": 78.3,
2295
  "confidence": "official",
2296
  "source": "MMLU-Pro API",
2297
  "date": "2026-03-16"
2298
  },
2299
  "hle": {
2300
- "score": 15.5,
2301
  "confidence": "official",
2302
  "source": "HLE API",
2303
  "date": "2026-03-16"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2304
  }
2305
  },
2306
- "aggregateScore": 46.9,
2307
- "coverageCount": 2,
2308
- "coveragePercent": 16.7,
2309
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png"
2310
  },
2311
  {
2312
- "id": "minimaxai-minimax-m2.1",
2313
- "name": "MiniMaxAI/MiniMax-M2.1",
2314
- "provider": "MiniMaxAI",
2315
  "type": "open",
2316
  "released": "2024.01",
2317
  "metadata": {
2318
  "license": "Unknown",
2319
  "parameters": "Unknown",
2320
- "parametersInBillions": 228.7,
2321
  "contextWindow": 0,
2322
  "modality": "text",
2323
  "architecture": "Transformer"
2324
  },
2325
  "benchmarks": {
2326
- "mmluPro": {
2327
- "score": 88.0,
2328
  "confidence": "official",
2329
- "source": "MMLU-Pro API",
2330
  "date": "2026-03-16"
2331
  },
2332
  "hle": {
2333
- "score": 22.2,
2334
  "confidence": "official",
2335
  "source": "HLE API",
2336
  "date": "2026-03-16"
2337
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2338
  "terminalBench": {
2339
- "score": 29.2,
2340
  "confidence": "official",
2341
  "source": "Terminal-Bench 2.0 API",
2342
  "date": "2026-03-16"
 
 
 
 
 
 
 
 
 
 
 
 
2343
  }
2344
  },
2345
- "aggregateScore": 46.47,
2346
  "coverageCount": 3,
2347
  "coveragePercent": 25.0,
2348
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2349
  },
2350
  {
2351
  "id": "zai-org-glm-4.7-flash",
@@ -2519,97 +2654,103 @@ const LEADERBOARD_DATA =
2519
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg"
2520
  },
2521
  {
2522
- "id": "openai-gpt-oss-120b",
2523
- "name": "openai/gpt-oss-120b",
2524
- "provider": "openai",
2525
  "type": "open",
2526
  "released": "2024.01",
2527
  "metadata": {
2528
  "license": "Unknown",
2529
  "parameters": "Unknown",
2530
- "parametersInBillions": 120.4,
2531
  "contextWindow": 0,
2532
  "modality": "text",
2533
  "architecture": "Transformer"
2534
  },
2535
  "benchmarks": {
2536
- "gpqa": {
2537
- "score": 67.1,
2538
- "confidence": "official",
2539
- "source": "GPQA Diamond API",
2540
- "date": "2026-03-16"
2541
- },
2542
  "hle": {
2543
- "score": 5.2,
2544
  "confidence": "official",
2545
  "source": "HLE API",
2546
  "date": "2026-03-16"
2547
- },
2548
- "sweVerified": {
2549
- "score": 47.9,
2550
- "confidence": "official",
2551
- "source": "SWE-bench Verified API",
2552
- "date": "2026-03-16"
2553
  }
2554
  },
2555
- "aggregateScore": 40.07,
2556
- "coverageCount": 3,
2557
- "coveragePercent": 25.0,
2558
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png"
2559
  },
2560
  {
2561
- "id": "miromind-ai-mirothinker-v1.5-235b",
2562
- "name": "miromind-ai/MiroThinker-v1.5-235B",
2563
- "provider": "miromind-ai",
2564
  "type": "open",
2565
  "released": "2024.01",
2566
  "metadata": {
2567
  "license": "Unknown",
2568
  "parameters": "Unknown",
2569
- "parametersInBillions": 235.0,
2570
  "contextWindow": 0,
2571
  "modality": "text",
2572
  "architecture": "Transformer"
2573
  },
2574
  "benchmarks": {
2575
  "hle": {
2576
- "score": 39.2,
2577
  "confidence": "official",
2578
  "source": "HLE API",
2579
  "date": "2026-03-16"
2580
  }
2581
  },
2582
- "aggregateScore": 39.2,
2583
  "coverageCount": 1,
2584
  "coveragePercent": 8.3,
2585
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png"
2586
  },
2587
  {
2588
- "id": "nvidia-nemotron-orchestrator-8b",
2589
- "name": "nvidia/Nemotron-Orchestrator-8B",
2590
- "provider": "nvidia",
2591
  "type": "open",
2592
  "released": "2024.01",
2593
  "metadata": {
2594
  "license": "Unknown",
2595
  "parameters": "Unknown",
2596
- "parametersInBillions": 8.0,
2597
  "contextWindow": 0,
2598
  "modality": "text",
2599
  "architecture": "Transformer"
2600
  },
2601
  "benchmarks": {
 
 
 
 
 
 
2602
  "hle": {
2603
- "score": 37.1,
2604
  "confidence": "official",
2605
  "source": "HLE API",
2606
  "date": "2026-03-16"
 
 
 
 
 
 
 
 
 
 
 
 
2607
  }
2608
  },
2609
- "aggregateScore": 37.1,
2610
- "coverageCount": 1,
2611
- "coveragePercent": 8.3,
2612
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png"
2613
  },
2614
  {
2615
  "id": "openai-gpt-oss-20b",
@@ -2697,94 +2838,73 @@ const LEADERBOARD_DATA =
2697
  "confidence": "official",
2698
  "source": "Terminal-Bench 2.0 API",
2699
  "date": "2026-03-16"
 
 
 
 
 
 
2700
  }
2701
  },
2702
- "aggregateScore": 27.8,
2703
- "coverageCount": 1,
2704
- "coveragePercent": 8.3,
2705
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
2706
  },
2707
  {
2708
- "id": "zai-org-glm-4.6",
2709
- "name": "zai-org/GLM-4.6",
2710
- "provider": "zai-org",
2711
  "type": "open",
2712
  "released": "2024.01",
2713
  "metadata": {
2714
  "license": "Unknown",
2715
  "parameters": "Unknown",
2716
- "parametersInBillions": 356.8,
2717
  "contextWindow": 0,
2718
  "modality": "text",
2719
  "architecture": "Transformer"
2720
  },
2721
  "benchmarks": {
2722
- "terminalBench": {
2723
- "score": 24.5,
2724
  "confidence": "official",
2725
- "source": "Terminal-Bench 2.0 API",
2726
  "date": "2026-03-16"
2727
  }
2728
  },
2729
- "aggregateScore": 24.5,
2730
  "coverageCount": 1,
2731
  "coveragePercent": 8.3,
2732
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png"
2733
  },
2734
  {
2735
- "id": "qwen-qwen3-coder-480b-a35b-instruct",
2736
- "name": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
2737
  "provider": "Qwen",
2738
  "type": "open",
2739
  "released": "2024.01",
2740
  "metadata": {
2741
  "license": "Unknown",
2742
  "parameters": "Unknown",
2743
- "parametersInBillions": 480.2,
2744
  "contextWindow": 0,
2745
  "modality": "text",
2746
  "architecture": "Transformer"
2747
  },
2748
  "benchmarks": {
2749
- "terminalBench": {
2750
- "score": 23.9,
2751
  "confidence": "official",
2752
- "source": "Terminal-Bench 2.0 API",
2753
  "date": "2026-03-16"
2754
  }
2755
  },
2756
- "aggregateScore": 23.9,
2757
  "coverageCount": 1,
2758
  "coveragePercent": 8.3,
2759
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
2760
  },
2761
- {
2762
- "id": "xiaomimimo-mimo-v2-flash",
2763
- "name": "XiaomiMiMo/MiMo-V2-Flash",
2764
- "provider": "XiaomiMiMo",
2765
- "type": "open",
2766
- "released": "2024.01",
2767
- "metadata": {
2768
- "license": "Unknown",
2769
- "parameters": "Unknown",
2770
- "parametersInBillions": 309.8,
2771
- "contextWindow": 0,
2772
- "modality": "text",
2773
- "architecture": "Transformer"
2774
- },
2775
- "benchmarks": {
2776
- "hle": {
2777
- "score": 22.1,
2778
- "confidence": "official",
2779
- "source": "HLE API",
2780
- "date": "2026-03-16"
2781
- }
2782
- },
2783
- "aggregateScore": 22.1,
2784
- "coverageCount": 1,
2785
- "coveragePercent": 8.3,
2786
- "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg"
2787
- },
2788
  {
2789
  "id": "qwen-qwen3.5-0.8b",
2790
  "name": "Qwen/Qwen3.5-0.8B",
@@ -2845,6 +2965,93 @@ const LEADERBOARD_DATA =
2845
  "coveragePercent": 8.3,
2846
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png"
2847
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2848
  {
2849
  "id": "helpingai-dhanishtha-2.0-0126",
2850
  "name": "HelpingAI/Dhanishtha-2.0-0126",
@@ -2871,6 +3078,33 @@ const LEADERBOARD_DATA =
2871
  "coverageCount": 1,
2872
  "coveragePercent": 8.3,
2873
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2874
  }
2875
  ]
2876
  }
 
394
  {
395
  "metadata": {
396
  "version": "1.0.0",
397
+ "lastUpdated": "2026-03-16T16:20:45.100745Z",
398
  "title": "Official Benchmarks Leaderboard 2026",
399
  "description": "Unified leaderboard for 12 official Hugging Face benchmarks",
400
+ "totalModels": 73,
401
  "totalBenchmarks": 12
402
  },
403
  "benchmarks": {
 
1477
  "coveragePercent": 58.3,
1478
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/66935cee39002fc0569c2943/Qv8QPbkgoKE3wR4jTzHiy.png"
1479
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1480
  {
1481
  "id": "zai-org-glm-5",
1482
  "name": "zai-org/GLM-5",
 
1621
  "coveragePercent": 8.3,
1622
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641fc216a390e539522d511f/Xtxh40e8zSzkuKtCr58DH.jpeg"
1623
  },
1624
+ {
1625
+ "id": "moonshotai-kimi-k2-instruct-0905",
1626
+ "name": "moonshotai/Kimi-K2-Instruct-0905",
1627
+ "provider": "moonshotai",
1628
+ "type": "open",
1629
+ "released": "2024.01",
1630
+ "metadata": {
1631
+ "license": "Unknown",
1632
+ "parameters": "Unknown",
1633
+ "parametersInBillions": 1026.5,
1634
+ "contextWindow": 0,
1635
+ "modality": "text",
1636
+ "architecture": "Transformer"
1637
+ },
1638
+ "benchmarks": {
1639
+ "evasionBench": {
1640
+ "score": 66.68,
1641
+ "confidence": "official",
1642
+ "source": "EvasionBench API",
1643
+ "date": "2026-03-16"
1644
+ }
1645
+ },
1646
+ "aggregateScore": 66.68,
1647
+ "coverageCount": 1,
1648
+ "coveragePercent": 8.3,
1649
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
1650
+ },
1651
  {
1652
  "id": "gair-openswe-72b",
1653
  "name": "GAIR/OpenSWE-72B",
 
1708
  "coveragePercent": 16.7,
1709
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
1710
  },
1711
+ {
1712
+ "id": "deepseek-ai-deepseek-v3.2",
1713
+ "name": "deepseek-ai/DeepSeek-V3.2",
1714
+ "provider": "deepseek-ai",
1715
+ "type": "open",
1716
+ "released": "2024.01",
1717
+ "metadata": {
1718
+ "license": "Unknown",
1719
+ "parameters": "Unknown",
1720
+ "parametersInBillions": 685.4,
1721
+ "contextWindow": 0,
1722
+ "modality": "text",
1723
+ "architecture": "Transformer"
1724
+ },
1725
+ "benchmarks": {
1726
+ "mmluPro": {
1727
+ "score": 85.0,
1728
+ "confidence": "official",
1729
+ "source": "MMLU-Pro API",
1730
+ "date": "2026-03-16"
1731
+ },
1732
+ "gpqa": {
1733
+ "score": 82.4,
1734
+ "confidence": "official",
1735
+ "source": "GPQA Diamond API",
1736
+ "date": "2026-03-16"
1737
+ },
1738
+ "hle": {
1739
+ "score": 40.8,
1740
+ "confidence": "official",
1741
+ "source": "HLE API",
1742
+ "date": "2026-03-16"
1743
+ },
1744
+ "sweVerified": {
1745
+ "score": 70.0,
1746
+ "confidence": "official",
1747
+ "source": "SWE-bench Verified API",
1748
+ "date": "2026-03-16"
1749
+ },
1750
+ "aime2026": {
1751
+ "score": 94.17,
1752
+ "confidence": "official",
1753
+ "source": "AIME 2026 API",
1754
+ "date": "2026-03-16"
1755
+ },
1756
+ "hmmt2026": {
1757
+ "score": 84.09,
1758
+ "confidence": "official",
1759
+ "source": "HMMT Feb 2026 API",
1760
+ "date": "2026-03-16"
1761
+ },
1762
+ "terminalBench": {
1763
+ "score": 39.6,
1764
+ "confidence": "official",
1765
+ "source": "Terminal-Bench 2.0 API",
1766
+ "date": "2026-03-16"
1767
+ },
1768
+ "swePro": {
1769
+ "score": 15.56,
1770
+ "confidence": "official",
1771
+ "source": "SWE-bench Pro API",
1772
+ "date": "2026-03-16"
1773
+ },
1774
+ "evasionBench": {
1775
+ "score": 66.88,
1776
+ "confidence": "official",
1777
+ "source": "EvasionBench API",
1778
+ "date": "2026-03-16"
1779
+ }
1780
+ },
1781
+ "aggregateScore": 64.28,
1782
+ "coverageCount": 9,
1783
+ "coveragePercent": 75.0,
1784
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png"
1785
+ },
1786
  {
1787
  "id": "qwen-qwen3.5-122b-a10b",
1788
  "name": "Qwen/Qwen3.5-122B-A10B",
 
1861
  "coveragePercent": 8.3,
1862
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png"
1863
  },
1864
+ {
1865
+ "id": "zai-org-glm-4.7",
1866
+ "name": "zai-org/GLM-4.7",
1867
+ "provider": "zai-org",
1868
+ "type": "open",
1869
+ "released": "2024.01",
1870
+ "metadata": {
1871
+ "license": "Unknown",
1872
+ "parameters": "Unknown",
1873
+ "parametersInBillions": 358.3,
1874
+ "contextWindow": 0,
1875
+ "modality": "text",
1876
+ "architecture": "Transformer"
1877
+ },
1878
+ "benchmarks": {
1879
+ "mmluPro": {
1880
+ "score": 84.3,
1881
+ "confidence": "official",
1882
+ "source": "MMLU-Pro API",
1883
+ "date": "2026-03-16"
1884
+ },
1885
+ "gpqa": {
1886
+ "score": 85.7,
1887
+ "confidence": "official",
1888
+ "source": "GPQA Diamond API",
1889
+ "date": "2026-03-16"
1890
+ },
1891
+ "hle": {
1892
+ "score": 24.8,
1893
+ "confidence": "official",
1894
+ "source": "HLE API",
1895
+ "date": "2026-03-16"
1896
+ },
1897
+ "terminalBench": {
1898
+ "score": 33.4,
1899
+ "confidence": "official",
1900
+ "source": "Terminal-Bench 2.0 API",
1901
+ "date": "2026-03-16"
1902
+ },
1903
+ "evasionBench": {
1904
+ "score": 82.91,
1905
+ "confidence": "official",
1906
+ "source": "EvasionBench API",
1907
+ "date": "2026-03-16"
1908
+ }
1909
+ },
1910
+ "aggregateScore": 62.22,
1911
+ "coverageCount": 5,
1912
+ "coveragePercent": 41.7,
1913
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png"
1914
+ },
1915
  {
1916
  "id": "qwen-qwen3.5-27b",
1917
  "name": "Qwen/Qwen3.5-27B",
 
2137
  "coveragePercent": 33.3,
2138
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
2139
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2140
  {
2141
  "id": "qwen-qwen3.5-2b",
2142
  "name": "Qwen/Qwen3.5-2B",
 
2288
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png"
2289
  },
2290
  {
2291
+ "id": "qwen-qwen3-coder-next",
2292
+ "name": "Qwen/Qwen3-Coder-Next",
2293
+ "provider": "Qwen",
2294
  "type": "open",
2295
  "released": "2024.01",
2296
  "metadata": {
2297
  "license": "Unknown",
2298
  "parameters": "Unknown",
2299
+ "parametersInBillions": 79.7,
2300
  "contextWindow": 0,
2301
  "modality": "text",
2302
  "architecture": "Transformer"
2303
  },
2304
  "benchmarks": {
2305
+ "sweVerified": {
2306
+ "score": 70.6,
2307
  "confidence": "official",
2308
+ "source": "SWE-bench Verified API",
2309
  "date": "2026-03-16"
2310
  },
2311
+ "terminalBench": {
2312
+ "score": 36.2,
2313
  "confidence": "official",
2314
+ "source": "Terminal-Bench 2.0 API",
2315
  "date": "2026-03-16"
2316
+ },
2317
+ "swePro": {
2318
+ "score": 44.3,
2319
+ "confidence": "official",
2320
+ "source": "SWE-bench Pro API",
2321
+ "date": "2026-03-16"
2322
+ }
2323
  },
2324
+ "aggregateScore": 50.37,
2325
+ "coverageCount": 3,
2326
+ "coveragePercent": 25.0,
2327
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
2328
  },
2329
  {
2330
+ "id": "minimaxai-minimax-m2.1",
2331
+ "name": "MiniMaxAI/MiniMax-M2.1",
2332
+ "provider": "MiniMaxAI",
2333
  "type": "open",
2334
  "released": "2024.01",
2335
  "metadata": {
2336
  "license": "Unknown",
2337
  "parameters": "Unknown",
2338
+ "parametersInBillions": 228.7,
2339
  "contextWindow": 0,
2340
  "modality": "text",
2341
  "architecture": "Transformer"
2342
  },
2343
  "benchmarks": {
2344
  "mmluPro": {
2345
+ "score": 88.0,
2346
  "confidence": "official",
2347
  "source": "MMLU-Pro API",
2348
  "date": "2026-03-16"
2349
  },
2350
  "hle": {
2351
+ "score": 22.2,
2352
  "confidence": "official",
2353
  "source": "HLE API",
2354
  "date": "2026-03-16"
2355
+ },
2356
+ "terminalBench": {
2357
+ "score": 29.2,
2358
+ "confidence": "official",
2359
+ "source": "Terminal-Bench 2.0 API",
2360
+ "date": "2026-03-16"
2361
+ },
2362
+ "swePro": {
2363
+ "score": 36.81,
2364
+ "confidence": "official",
2365
+ "source": "SWE-bench Pro API",
2366
+ "date": "2026-03-16"
2367
+ },
2368
+ "evasionBench": {
2369
+ "score": 71.31,
2370
+ "confidence": "official",
2371
+ "source": "EvasionBench API",
2372
+ "date": "2026-03-16"
2373
  }
2374
  },
2375
+ "aggregateScore": 49.5,
2376
+ "coverageCount": 5,
2377
+ "coveragePercent": 41.7,
2378
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg"
2379
  },
2380
  {
2381
+ "id": "nanbeige-nanbeige4.1-3b",
2382
+ "name": "Nanbeige/Nanbeige4.1-3B",
2383
+ "provider": "Nanbeige",
2384
  "type": "open",
2385
  "released": "2024.01",
2386
  "metadata": {
2387
  "license": "Unknown",
2388
  "parameters": "Unknown",
2389
+ "parametersInBillions": 3.9,
2390
  "contextWindow": 0,
2391
  "modality": "text",
2392
  "architecture": "Transformer"
2393
  },
2394
  "benchmarks": {
2395
+ "gpqa": {
2396
+ "score": 83.8,
2397
  "confidence": "official",
2398
+ "source": "GPQA Diamond API",
2399
  "date": "2026-03-16"
2400
  },
2401
  "hle": {
2402
+ "score": 12.6,
2403
  "confidence": "official",
2404
  "source": "HLE API",
2405
  "date": "2026-03-16"
2406
+ }
2407
+ },
2408
+ "aggregateScore": 48.2,
2409
+ "coverageCount": 2,
2410
+ "coveragePercent": 16.7,
2411
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png"
2412
+ },
2413
+ {
2414
+ "id": "qwen-qwen3-coder-480b-a35b-instruct",
2415
+ "name": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
2416
+ "provider": "Qwen",
2417
+ "type": "open",
2418
+ "released": "2024.01",
2419
+ "metadata": {
2420
+ "license": "Unknown",
2421
+ "parameters": "Unknown",
2422
+ "parametersInBillions": 480.2,
2423
+ "contextWindow": 0,
2424
+ "modality": "text",
2425
+ "architecture": "Transformer"
2426
+ },
2427
+ "benchmarks": {
2428
  "terminalBench": {
2429
+ "score": 23.9,
2430
  "confidence": "official",
2431
  "source": "Terminal-Bench 2.0 API",
2432
  "date": "2026-03-16"
2433
+ },
2434
+ "swePro": {
2435
+ "score": 38.7,
2436
+ "confidence": "official",
2437
+ "source": "SWE-bench Pro API",
2438
+ "date": "2026-03-16"
2439
+ },
2440
+ "evasionBench": {
2441
+ "score": 78.16,
2442
+ "confidence": "official",
2443
+ "source": "EvasionBench API",
2444
+ "date": "2026-03-16"
2445
  }
2446
  },
2447
+ "aggregateScore": 46.92,
2448
  "coverageCount": 3,
2449
  "coveragePercent": 25.0,
2450
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
2451
+ },
2452
+ {
2453
+ "id": "nvidia-nvidia-nemotron-3-nano-30b-a3b-bf16",
2454
+ "name": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
2455
+ "provider": "nvidia",
2456
+ "type": "open",
2457
+ "released": "2024.01",
2458
+ "metadata": {
2459
+ "license": "Unknown",
2460
+ "parameters": "Unknown",
2461
+ "parametersInBillions": 31.6,
2462
+ "contextWindow": 0,
2463
+ "modality": "text",
2464
+ "architecture": "Transformer"
2465
+ },
2466
+ "benchmarks": {
2467
+ "mmluPro": {
2468
+ "score": 78.3,
2469
+ "confidence": "official",
2470
+ "source": "MMLU-Pro API",
2471
+ "date": "2026-03-16"
2472
+ },
2473
+ "hle": {
2474
+ "score": 15.5,
2475
+ "confidence": "official",
2476
+ "source": "HLE API",
2477
+ "date": "2026-03-16"
2478
+ }
2479
+ },
2480
+ "aggregateScore": 46.9,
2481
+ "coverageCount": 2,
2482
+ "coveragePercent": 16.7,
2483
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png"
2484
  },
2485
  {
2486
  "id": "zai-org-glm-4.7-flash",
 
2654
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg"
2655
  },
2656
  {
2657
+ "id": "miromind-ai-mirothinker-v1.5-235b",
2658
+ "name": "miromind-ai/MiroThinker-v1.5-235B",
2659
+ "provider": "miromind-ai",
2660
  "type": "open",
2661
  "released": "2024.01",
2662
  "metadata": {
2663
  "license": "Unknown",
2664
  "parameters": "Unknown",
2665
+ "parametersInBillions": 235.0,
2666
  "contextWindow": 0,
2667
  "modality": "text",
2668
  "architecture": "Transformer"
2669
  },
2670
  "benchmarks": {
 
 
 
 
 
 
2671
  "hle": {
2672
+ "score": 39.2,
2673
  "confidence": "official",
2674
  "source": "HLE API",
2675
  "date": "2026-03-16"
 
 
 
 
 
 
2676
  }
2677
  },
2678
+ "aggregateScore": 39.2,
2679
+ "coverageCount": 1,
2680
+ "coveragePercent": 8.3,
2681
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png"
2682
  },
2683
  {
2684
+ "id": "nvidia-nemotron-orchestrator-8b",
2685
+ "name": "nvidia/Nemotron-Orchestrator-8B",
2686
+ "provider": "nvidia",
2687
  "type": "open",
2688
  "released": "2024.01",
2689
  "metadata": {
2690
  "license": "Unknown",
2691
  "parameters": "Unknown",
2692
+ "parametersInBillions": 8.0,
2693
  "contextWindow": 0,
2694
  "modality": "text",
2695
  "architecture": "Transformer"
2696
  },
2697
  "benchmarks": {
2698
  "hle": {
2699
+ "score": 37.1,
2700
  "confidence": "official",
2701
  "source": "HLE API",
2702
  "date": "2026-03-16"
2703
  }
2704
  },
2705
+ "aggregateScore": 37.1,
2706
  "coverageCount": 1,
2707
  "coveragePercent": 8.3,
2708
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png"
2709
  },
2710
  {
2711
+ "id": "openai-gpt-oss-120b",
2712
+ "name": "openai/gpt-oss-120b",
2713
+ "provider": "openai",
2714
  "type": "open",
2715
  "released": "2024.01",
2716
  "metadata": {
2717
  "license": "Unknown",
2718
  "parameters": "Unknown",
2719
+ "parametersInBillions": 120.4,
2720
  "contextWindow": 0,
2721
  "modality": "text",
2722
  "architecture": "Transformer"
2723
  },
2724
  "benchmarks": {
2725
+ "gpqa": {
2726
+ "score": 67.1,
2727
+ "confidence": "official",
2728
+ "source": "GPQA Diamond API",
2729
+ "date": "2026-03-16"
2730
+ },
2731
  "hle": {
2732
+ "score": 5.2,
2733
  "confidence": "official",
2734
  "source": "HLE API",
2735
  "date": "2026-03-16"
2736
+ },
2737
+ "sweVerified": {
2738
+ "score": 47.9,
2739
+ "confidence": "official",
2740
+ "source": "SWE-bench Verified API",
2741
+ "date": "2026-03-16"
2742
+ },
2743
+ "swePro": {
2744
+ "score": 16.2,
2745
+ "confidence": "official",
2746
+ "source": "SWE-bench Pro API",
2747
+ "date": "2026-03-16"
2748
  }
2749
  },
2750
+ "aggregateScore": 34.1,
2751
+ "coverageCount": 4,
2752
+ "coveragePercent": 33.3,
2753
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png"
2754
  },
2755
  {
2756
  "id": "openai-gpt-oss-20b",
 
2838
  "confidence": "official",
2839
  "source": "Terminal-Bench 2.0 API",
2840
  "date": "2026-03-16"
2841
+ },
2842
+ "swePro": {
2843
+ "score": 27.67,
2844
+ "confidence": "official",
2845
+ "source": "SWE-bench Pro API",
2846
+ "date": "2026-03-16"
2847
  }
2848
  },
2849
+ "aggregateScore": 27.73,
2850
+ "coverageCount": 2,
2851
+ "coveragePercent": 16.7,
2852
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
2853
  },
2854
  {
2855
+ "id": "xiaomimimo-mimo-v2-flash",
2856
+ "name": "XiaomiMiMo/MiMo-V2-Flash",
2857
+ "provider": "XiaomiMiMo",
2858
  "type": "open",
2859
  "released": "2024.01",
2860
  "metadata": {
2861
  "license": "Unknown",
2862
  "parameters": "Unknown",
2863
+ "parametersInBillions": 309.8,
2864
  "contextWindow": 0,
2865
  "modality": "text",
2866
  "architecture": "Transformer"
2867
  },
2868
  "benchmarks": {
2869
+ "hle": {
2870
+ "score": 22.1,
2871
  "confidence": "official",
2872
+ "source": "HLE API",
2873
  "date": "2026-03-16"
2874
  }
2875
  },
2876
+ "aggregateScore": 22.1,
2877
  "coverageCount": 1,
2878
  "coveragePercent": 8.3,
2879
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg"
2880
  },
2881
  {
2882
+ "id": "qwen-qwen3-235b-a22b",
2883
+ "name": "Qwen/Qwen3-235B-A22B",
2884
  "provider": "Qwen",
2885
  "type": "open",
2886
  "released": "2024.01",
2887
  "metadata": {
2888
  "license": "Unknown",
2889
  "parameters": "Unknown",
2890
+ "parametersInBillions": 235.1,
2891
  "contextWindow": 0,
2892
  "modality": "text",
2893
  "architecture": "Transformer"
2894
  },
2895
  "benchmarks": {
2896
+ "swePro": {
2897
+ "score": 21.41,
2898
  "confidence": "official",
2899
+ "source": "SWE-bench Pro API",
2900
  "date": "2026-03-16"
2901
  }
2902
  },
2903
+ "aggregateScore": 21.41,
2904
  "coverageCount": 1,
2905
  "coveragePercent": 8.3,
2906
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png"
2907
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2908
  {
2909
  "id": "qwen-qwen3.5-0.8b",
2910
  "name": "Qwen/Qwen3.5-0.8B",
 
2965
  "coveragePercent": 8.3,
2966
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png"
2967
  },
2968
+ {
2969
+ "id": "zai-org-glm-4.6",
2970
+ "name": "zai-org/GLM-4.6",
2971
+ "provider": "zai-org",
2972
+ "type": "open",
2973
+ "released": "2024.01",
2974
+ "metadata": {
2975
+ "license": "Unknown",
2976
+ "parameters": "Unknown",
2977
+ "parametersInBillions": 356.8,
2978
+ "contextWindow": 0,
2979
+ "modality": "text",
2980
+ "architecture": "Transformer"
2981
+ },
2982
+ "benchmarks": {
2983
+ "terminalBench": {
2984
+ "score": 24.5,
2985
+ "confidence": "official",
2986
+ "source": "Terminal-Bench 2.0 API",
2987
+ "date": "2026-03-16"
2988
+ },
2989
+ "swePro": {
2990
+ "score": 9.67,
2991
+ "confidence": "official",
2992
+ "source": "SWE-bench Pro API",
2993
+ "date": "2026-03-16"
2994
+ }
2995
+ },
2996
+ "aggregateScore": 17.09,
2997
+ "coverageCount": 2,
2998
+ "coveragePercent": 16.7,
2999
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png"
3000
+ },
3001
+ {
3002
+ "id": "google-gemma-3-27b-it",
3003
+ "name": "google/gemma-3-27b-it",
3004
+ "provider": "google",
3005
+ "type": "open",
3006
+ "released": "2024.01",
3007
+ "metadata": {
3008
+ "license": "Unknown",
3009
+ "parameters": "Unknown",
3010
+ "parametersInBillions": 27.4,
3011
+ "contextWindow": 0,
3012
+ "modality": "text",
3013
+ "architecture": "Transformer"
3014
+ },
3015
+ "benchmarks": {
3016
+ "swePro": {
3017
+ "score": 11.38,
3018
+ "confidence": "official",
3019
+ "source": "SWE-bench Pro API",
3020
+ "date": "2026-03-16"
3021
+ }
3022
+ },
3023
+ "aggregateScore": 11.38,
3024
+ "coverageCount": 1,
3025
+ "coveragePercent": 8.3,
3026
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png"
3027
+ },
3028
+ {
3029
+ "id": "meta-llama-llama-3.1-405b-instruct",
3030
+ "name": "meta-llama/Llama-3.1-405B-Instruct",
3031
+ "provider": "meta-llama",
3032
+ "type": "open",
3033
+ "released": "2024.01",
3034
+ "metadata": {
3035
+ "license": "Unknown",
3036
+ "parameters": "Unknown",
3037
+ "parametersInBillions": 405.9,
3038
+ "contextWindow": 0,
3039
+ "modality": "text",
3040
+ "architecture": "Transformer"
3041
+ },
3042
+ "benchmarks": {
3043
+ "swePro": {
3044
+ "score": 11.18,
3045
+ "confidence": "official",
3046
+ "source": "SWE-bench Pro API",
3047
+ "date": "2026-03-16"
3048
+ }
3049
+ },
3050
+ "aggregateScore": 11.18,
3051
+ "coverageCount": 1,
3052
+ "coveragePercent": 8.3,
3053
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png"
3054
+ },
3055
  {
3056
  "id": "helpingai-dhanishtha-2.0-0126",
3057
  "name": "HelpingAI/Dhanishtha-2.0-0126",
 
3078
  "coverageCount": 1,
3079
  "coveragePercent": 8.3,
3080
  "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png"
3081
+ },
3082
+ {
3083
+ "id": "meta-llama-llama-4-maverick-17b-128e-instruct",
3084
+ "name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
3085
+ "provider": "meta-llama",
3086
+ "type": "open",
3087
+ "released": "2024.01",
3088
+ "metadata": {
3089
+ "license": "Unknown",
3090
+ "parameters": "Unknown",
3091
+ "parametersInBillions": 401.6,
3092
+ "contextWindow": 0,
3093
+ "modality": "text",
3094
+ "architecture": "Transformer"
3095
+ },
3096
+ "benchmarks": {
3097
+ "swePro": {
3098
+ "score": 5.24,
3099
+ "confidence": "official",
3100
+ "source": "SWE-bench Pro API",
3101
+ "date": "2026-03-16"
3102
+ }
3103
+ },
3104
+ "aggregateScore": 5.24,
3105
+ "coverageCount": 1,
3106
+ "coveragePercent": 8.3,
3107
+ "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png"
3108
  }
3109
  ]
3110
  }
scripts/fetch_api_only.py CHANGED
@@ -63,6 +63,18 @@ BENCHMARK_CONFIGS = [
63
  "name": "Terminal-Bench 2.0",
64
  "gated": False,
65
  },
 
 
 
 
 
 
 
 
 
 
 
 
66
  ]
67
 
68
 
 
63
  "name": "Terminal-Bench 2.0",
64
  "gated": False,
65
  },
66
+ {
67
+ "dataset": "ScaleAI/SWE-bench_Pro",
68
+ "key": "swePro",
69
+ "name": "SWE-bench Pro",
70
+ "gated": False,
71
+ },
72
+ {
73
+ "dataset": "FutureMa/EvasionBench",
74
+ "key": "evasionBench",
75
+ "name": "EvasionBench",
76
+ "gated": False,
77
+ },
78
  ]
79
 
80