Spaces:
Running
Running
File size: 8,152 Bytes
fae4e5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
"""
TraceMind-AI - Agent Evaluation Platform
MCP Client consuming TraceMind-mcp-server for intelligent analysis
"""
import os
import gradio as gr
from dotenv import load_dotenv
import pandas as pd
# Load environment variables
load_dotenv()
# Import utilities
from utils.auth import is_authenticated, get_user_info, create_login_button, create_user_info_display, DEV_MODE
from utils.navigation import Navigator, Screen
from data_loader import create_data_loader_from_env
from styles.tracemind_theme import get_tracemind_css
from mcp_client.sync_wrapper import get_sync_mcp_client
# Initialize
data_loader = create_data_loader_from_env()
navigator = Navigator()
mcp_client = get_sync_mcp_client()
# Global state
current_selected_run = None
def load_leaderboard_view(token, profile):
"""Load and display the leaderboard with MCP-powered insights"""
if not is_authenticated(token, profile):
return "Please log in to view the leaderboard", ""
try:
# Load real data from HuggingFace
leaderboard_df = data_loader.load_leaderboard()
if leaderboard_df.empty:
return "No evaluation runs found in the leaderboard", ""
# Format dataframe for display
display_df = leaderboard_df[[
'model', 'agent_type', 'success_rate', 'total_tests',
'avg_duration_ms', 'total_cost_usd', 'co2_emissions_g'
]].copy()
# Round numeric columns
display_df['success_rate'] = display_df['success_rate'].round(1)
display_df['avg_duration_ms'] = display_df['avg_duration_ms'].round(0)
display_df['total_cost_usd'] = display_df['total_cost_usd'].round(4)
display_df['co2_emissions_g'] = display_df['co2_emissions_g'].round(2)
# Get MCP-powered insights
try:
insights = mcp_client.analyze_leaderboard(
metric_focus="overall",
time_range="all_time",
top_n=5,
hf_token=os.getenv('HF_TOKEN'),
gemini_api_key=os.getenv('GEMINI_API_KEY')
)
except Exception as e:
insights = f"β οΈ MCP analysis unavailable: {str(e)}\n\n(Server may need initialization)"
return display_df, insights
except Exception as e:
return f"Error loading leaderboard: {e}", ""
def estimate_evaluation_cost(model, agent_type, num_tests):
"""Estimate cost for a new evaluation using MCP server"""
try:
cost_estimate = mcp_client.estimate_cost(
model=model,
agent_type=agent_type,
num_tests=int(num_tests),
hf_token=os.getenv('HF_TOKEN'),
gemini_api_key=os.getenv('GEMINI_API_KEY')
)
return cost_estimate
except Exception as e:
return f"β Error estimating cost: {str(e)}"
def build_ui():
"""Build the Gradio UI"""
with gr.Blocks(css=get_tracemind_css(), title="TraceMind-AI") as demo:
# Header
gr.Markdown("""
# π TraceMind-AI
### Agent Evaluation Platform with MCP-Powered Intelligence
**Powered by:**
- π Real data from HuggingFace datasets
- π€ MCP Server for AI-powered insights ([TraceMind-mcp-server](https://huggingface.co/spaces/kshitijthakkar/TraceMind-mcp-server))
- π§ Google Gemini 2.5 Flash for analysis
""")
# Authentication
with gr.Row():
with gr.Column(scale=2):
user_display = gr.HTML(create_user_info_display(None))
with gr.Column(scale=1):
login_btn = create_login_button()
# Main content (shown when authenticated)
with gr.Column(visible=DEV_MODE) as main_content:
with gr.Tabs() as tabs:
# Tab 1: Leaderboard
with gr.Tab("π Leaderboard"):
gr.Markdown("### Agent Evaluation Leaderboard")
gr.Markdown("Real-time data from `kshitijthakkar/smoltrace-leaderboard`")
load_leaderboard_btn = gr.Button("π Load Leaderboard", variant="primary")
with gr.Row():
with gr.Column(scale=2):
leaderboard_table = gr.Dataframe(
headers=["Model", "Agent Type", "Success Rate %", "Total Tests", "Avg Duration (ms)", "Cost ($)", "CO2 (g)"],
label="Evaluation Runs",
interactive=False
)
with gr.Column(scale=1):
leaderboard_insights = gr.Markdown("**MCP Analysis:**\n\nClick 'Load Leaderboard' to see AI-powered insights")
# Tab 2: Cost Estimator
with gr.Tab("π° Cost Estimator"):
gr.Markdown("### Estimate Evaluation Costs")
gr.Markdown("Uses MCP server to calculate costs for different models and configurations")
with gr.Row():
model_input = gr.Textbox(
label="Model",
placeholder="openai/gpt-4 or meta-llama/Llama-3.1-8B",
value="openai/gpt-4"
)
agent_type_input = gr.Dropdown(
["tool", "code", "both"],
label="Agent Type",
value="both"
)
num_tests_input = gr.Number(
label="Number of Tests",
value=100
)
estimate_btn = gr.Button("π΅ Estimate Cost", variant="primary")
cost_output = gr.Markdown("**Cost Estimate:**\n\nEnter details and click 'Estimate Cost'")
# Tab 3: MCP Server Status
with gr.Tab("π§ MCP Status"):
gr.Markdown("### TraceMind MCP Server Connection")
mcp_url_display = gr.Textbox(
label="MCP Server URL",
value=os.getenv('MCP_SERVER_URL', 'https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/'),
interactive=False
)
test_mcp_btn = gr.Button("π§ͺ Test MCP Connection", variant="secondary")
mcp_status = gr.Markdown("**Status:** Not tested yet")
# Event handlers
def handle_login(token, profile):
user = get_user_info(token, profile)
return create_user_info_display(user), gr.update(visible=True)
login_btn.click(
fn=handle_login,
inputs=[login_btn, login_btn], # Gradio provides token/profile automatically
outputs=[user_display, main_content]
)
load_leaderboard_btn.click(
fn=load_leaderboard_view,
inputs=[login_btn, login_btn],
outputs=[leaderboard_table, leaderboard_insights]
)
estimate_btn.click(
fn=estimate_evaluation_cost,
inputs=[model_input, agent_type_input, num_tests_input],
outputs=[cost_output]
)
def test_mcp_connection():
try:
mcp_client.initialize()
return "β
**Connected Successfully!**\n\nMCP server is online and ready"
except Exception as e:
return f"β **Connection Failed**\n\nError: {str(e)}"
test_mcp_btn.click(
fn=test_mcp_connection,
outputs=[mcp_status]
)
return demo
if __name__ == "__main__":
print("π Starting TraceMind-AI...")
print(f"π Leaderboard: {os.getenv('LEADERBOARD_REPO', 'kshitijthakkar/smoltrace-leaderboard')}")
print(f"π€ MCP Server: {os.getenv('MCP_SERVER_URL', 'https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/')}")
print(f"π οΈ Dev Mode: {DEV_MODE}")
demo = build_ui()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)
|