File size: 8,152 Bytes
fae4e5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
"""
TraceMind-AI - Agent Evaluation Platform
MCP Client consuming TraceMind-mcp-server for intelligent analysis
"""

import os
import gradio as gr
from dotenv import load_dotenv
import pandas as pd

# Load environment variables
load_dotenv()

# Import utilities
from utils.auth import is_authenticated, get_user_info, create_login_button, create_user_info_display, DEV_MODE
from utils.navigation import Navigator, Screen
from data_loader import create_data_loader_from_env
from styles.tracemind_theme import get_tracemind_css
from mcp_client.sync_wrapper import get_sync_mcp_client

# Initialize
data_loader = create_data_loader_from_env()
navigator = Navigator()
mcp_client = get_sync_mcp_client()

# Global state
current_selected_run = None


def load_leaderboard_view(token, profile):
    """Load and display the leaderboard with MCP-powered insights"""
    if not is_authenticated(token, profile):
        return "Please log in to view the leaderboard", ""

    try:
        # Load real data from HuggingFace
        leaderboard_df = data_loader.load_leaderboard()

        if leaderboard_df.empty:
            return "No evaluation runs found in the leaderboard", ""

        # Format dataframe for display
        display_df = leaderboard_df[[
            'model', 'agent_type', 'success_rate', 'total_tests',
            'avg_duration_ms', 'total_cost_usd', 'co2_emissions_g'
        ]].copy()

        # Round numeric columns
        display_df['success_rate'] = display_df['success_rate'].round(1)
        display_df['avg_duration_ms'] = display_df['avg_duration_ms'].round(0)
        display_df['total_cost_usd'] = display_df['total_cost_usd'].round(4)
        display_df['co2_emissions_g'] = display_df['co2_emissions_g'].round(2)

        # Get MCP-powered insights
        try:
            insights = mcp_client.analyze_leaderboard(
                metric_focus="overall",
                time_range="all_time",
                top_n=5,
                hf_token=os.getenv('HF_TOKEN'),
                gemini_api_key=os.getenv('GEMINI_API_KEY')
            )
        except Exception as e:
            insights = f"⚠️ MCP analysis unavailable: {str(e)}\n\n(Server may need initialization)"

        return display_df, insights

    except Exception as e:
        return f"Error loading leaderboard: {e}", ""


def estimate_evaluation_cost(model, agent_type, num_tests):
    """Estimate cost for a new evaluation using MCP server"""
    try:
        cost_estimate = mcp_client.estimate_cost(
            model=model,
            agent_type=agent_type,
            num_tests=int(num_tests),
            hf_token=os.getenv('HF_TOKEN'),
            gemini_api_key=os.getenv('GEMINI_API_KEY')
        )
        return cost_estimate
    except Exception as e:
        return f"❌ Error estimating cost: {str(e)}"


def build_ui():
    """Build the Gradio UI"""

    with gr.Blocks(css=get_tracemind_css(), title="TraceMind-AI") as demo:
        # Header
        gr.Markdown("""
        # πŸ” TraceMind-AI
        ### Agent Evaluation Platform with MCP-Powered Intelligence

        **Powered by:**
        - πŸ“Š Real data from HuggingFace datasets
        - πŸ€– MCP Server for AI-powered insights ([TraceMind-mcp-server](https://huggingface.co/spaces/kshitijthakkar/TraceMind-mcp-server))
        - 🧠 Google Gemini 2.5 Flash for analysis
        """)

        # Authentication
        with gr.Row():
            with gr.Column(scale=2):
                user_display = gr.HTML(create_user_info_display(None))
            with gr.Column(scale=1):
                login_btn = create_login_button()

        # Main content (shown when authenticated)
        with gr.Column(visible=DEV_MODE) as main_content:
            with gr.Tabs() as tabs:
                # Tab 1: Leaderboard
                with gr.Tab("πŸ“Š Leaderboard"):
                    gr.Markdown("### Agent Evaluation Leaderboard")
                    gr.Markdown("Real-time data from `kshitijthakkar/smoltrace-leaderboard`")

                    load_leaderboard_btn = gr.Button("πŸ”„ Load Leaderboard", variant="primary")

                    with gr.Row():
                        with gr.Column(scale=2):
                            leaderboard_table = gr.Dataframe(
                                headers=["Model", "Agent Type", "Success Rate %", "Total Tests", "Avg Duration (ms)", "Cost ($)", "CO2 (g)"],
                                label="Evaluation Runs",
                                interactive=False
                            )
                        with gr.Column(scale=1):
                            leaderboard_insights = gr.Markdown("**MCP Analysis:**\n\nClick 'Load Leaderboard' to see AI-powered insights")

                # Tab 2: Cost Estimator
                with gr.Tab("πŸ’° Cost Estimator"):
                    gr.Markdown("### Estimate Evaluation Costs")
                    gr.Markdown("Uses MCP server to calculate costs for different models and configurations")

                    with gr.Row():
                        model_input = gr.Textbox(
                            label="Model",
                            placeholder="openai/gpt-4 or meta-llama/Llama-3.1-8B",
                            value="openai/gpt-4"
                        )
                        agent_type_input = gr.Dropdown(
                            ["tool", "code", "both"],
                            label="Agent Type",
                            value="both"
                        )
                        num_tests_input = gr.Number(
                            label="Number of Tests",
                            value=100
                        )

                    estimate_btn = gr.Button("πŸ’΅ Estimate Cost", variant="primary")
                    cost_output = gr.Markdown("**Cost Estimate:**\n\nEnter details and click 'Estimate Cost'")

                # Tab 3: MCP Server Status
                with gr.Tab("πŸ”§ MCP Status"):
                    gr.Markdown("### TraceMind MCP Server Connection")

                    mcp_url_display = gr.Textbox(
                        label="MCP Server URL",
                        value=os.getenv('MCP_SERVER_URL', 'https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/'),
                        interactive=False
                    )

                    test_mcp_btn = gr.Button("πŸ§ͺ Test MCP Connection", variant="secondary")
                    mcp_status = gr.Markdown("**Status:** Not tested yet")

        # Event handlers
        def handle_login(token, profile):
            user = get_user_info(token, profile)
            return create_user_info_display(user), gr.update(visible=True)

        login_btn.click(
            fn=handle_login,
            inputs=[login_btn, login_btn],  # Gradio provides token/profile automatically
            outputs=[user_display, main_content]
        )

        load_leaderboard_btn.click(
            fn=load_leaderboard_view,
            inputs=[login_btn, login_btn],
            outputs=[leaderboard_table, leaderboard_insights]
        )

        estimate_btn.click(
            fn=estimate_evaluation_cost,
            inputs=[model_input, agent_type_input, num_tests_input],
            outputs=[cost_output]
        )

        def test_mcp_connection():
            try:
                mcp_client.initialize()
                return "βœ… **Connected Successfully!**\n\nMCP server is online and ready"
            except Exception as e:
                return f"❌ **Connection Failed**\n\nError: {str(e)}"

        test_mcp_btn.click(
            fn=test_mcp_connection,
            outputs=[mcp_status]
        )

    return demo


if __name__ == "__main__":
    print("πŸš€ Starting TraceMind-AI...")
    print(f"πŸ“Š Leaderboard: {os.getenv('LEADERBOARD_REPO', 'kshitijthakkar/smoltrace-leaderboard')}")
    print(f"πŸ€– MCP Server: {os.getenv('MCP_SERVER_URL', 'https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/')}")
    print(f"πŸ› οΈ  Dev Mode: {DEV_MODE}")

    demo = build_ui()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )