Spaces:
Running
on
Zero
Fix Gemini MCP error handling and session management
Browse files- Improve error logging in call_agent() with full tracebacks and detailed error messages
- Add session health check with timeout to detect and recover from invalid sessions
- Automatically invalidate and recreate sessions on errors to force retry
- Enhance empty response handling in reasoning.py and supervisor.py with proper fallback
- Add timeout protection (2s) to health checks to prevent hanging
- Improve session initialization error messages with specific failure causes
- Better distinguish between different MCP failure modes for easier debugging
This fixes issues where MCP sessions would fail silently and fall back to default behavior without proper error reporting. The app now provides better diagnostics while maintaining graceful fallback functionality.
- client.py +48 -8
- reasoning.py +16 -2
- supervisor.py +15 -1
|
@@ -42,8 +42,32 @@ async def get_mcp_session():
|
|
| 42 |
logger.warning("MCP not available - SDK not installed")
|
| 43 |
return None
|
| 44 |
|
|
|
|
| 45 |
if config.global_mcp_session is not None:
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
try:
|
| 49 |
mcp_env = os.environ.copy()
|
|
@@ -96,16 +120,17 @@ async def get_mcp_session():
|
|
| 96 |
logger.error(f" - Invalid GEMINI_API_KEY")
|
| 97 |
logger.error(f" - agent.py server not starting correctly")
|
| 98 |
logger.error(f" - Network/firewall issues")
|
|
|
|
| 99 |
import traceback
|
| 100 |
-
logger.
|
| 101 |
try:
|
| 102 |
await session.__aexit__(None, None, None)
|
| 103 |
-
except Exception:
|
| 104 |
-
|
| 105 |
try:
|
| 106 |
await stdio_ctx.__aexit__(None, None, None)
|
| 107 |
-
except Exception:
|
| 108 |
-
|
| 109 |
return None
|
| 110 |
|
| 111 |
config.global_mcp_session = session
|
|
@@ -199,13 +224,18 @@ async def call_agent(user_prompt: str, system_prompt: str = None, files: list =
|
|
| 199 |
logger.warning("Failed to get MCP session for Gemini call - check GEMINI_API_KEY and agent.py")
|
| 200 |
# Invalidate session to force retry on next call
|
| 201 |
config.global_mcp_session = None
|
|
|
|
| 202 |
return ""
|
| 203 |
|
| 204 |
tools = await get_cached_mcp_tools()
|
| 205 |
if not tools:
|
|
|
|
| 206 |
tools = await get_cached_mcp_tools(force_refresh=True)
|
| 207 |
if not tools:
|
| 208 |
logger.error("Unable to obtain MCP tool catalog for Gemini calls")
|
|
|
|
|
|
|
|
|
|
| 209 |
return ""
|
| 210 |
|
| 211 |
generate_tool = None
|
|
@@ -232,16 +262,26 @@ async def call_agent(user_prompt: str, system_prompt: str = None, files: list =
|
|
| 232 |
if temperature is not None:
|
| 233 |
arguments["temperature"] = temperature
|
| 234 |
|
|
|
|
| 235 |
result = await session.call_tool(generate_tool.name, arguments=arguments)
|
| 236 |
|
| 237 |
if hasattr(result, 'content') and result.content:
|
| 238 |
for item in result.content:
|
| 239 |
if hasattr(item, 'text'):
|
| 240 |
response_text = item.text.strip()
|
| 241 |
-
|
|
|
|
|
|
|
| 242 |
logger.warning("⚠️ Gemini MCP returned empty or invalid result")
|
| 243 |
return ""
|
| 244 |
except Exception as e:
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
return ""
|
| 247 |
|
|
|
|
| 42 |
logger.warning("MCP not available - SDK not installed")
|
| 43 |
return None
|
| 44 |
|
| 45 |
+
# Check if existing session is still valid
|
| 46 |
if config.global_mcp_session is not None:
|
| 47 |
+
try:
|
| 48 |
+
# Try to list tools as a quick health check (with timeout)
|
| 49 |
+
await asyncio.wait_for(config.global_mcp_session.list_tools(), timeout=2.0)
|
| 50 |
+
return config.global_mcp_session
|
| 51 |
+
except asyncio.TimeoutError:
|
| 52 |
+
logger.warning("MCP session health check timed out, recreating session")
|
| 53 |
+
# Clean up invalid session
|
| 54 |
+
try:
|
| 55 |
+
if config.global_mcp_stdio_ctx is not None:
|
| 56 |
+
await config.global_mcp_stdio_ctx.__aexit__(None, None, None)
|
| 57 |
+
except Exception:
|
| 58 |
+
pass
|
| 59 |
+
config.global_mcp_session = None
|
| 60 |
+
config.global_mcp_stdio_ctx = None
|
| 61 |
+
except Exception as e:
|
| 62 |
+
logger.warning(f"Existing MCP session appears invalid, recreating: {type(e).__name__}: {e}")
|
| 63 |
+
# Clean up invalid session
|
| 64 |
+
try:
|
| 65 |
+
if config.global_mcp_stdio_ctx is not None:
|
| 66 |
+
await config.global_mcp_stdio_ctx.__aexit__(None, None, None)
|
| 67 |
+
except Exception:
|
| 68 |
+
pass
|
| 69 |
+
config.global_mcp_session = None
|
| 70 |
+
config.global_mcp_stdio_ctx = None
|
| 71 |
|
| 72 |
try:
|
| 73 |
mcp_env = os.environ.copy()
|
|
|
|
| 120 |
logger.error(f" - Invalid GEMINI_API_KEY")
|
| 121 |
logger.error(f" - agent.py server not starting correctly")
|
| 122 |
logger.error(f" - Network/firewall issues")
|
| 123 |
+
logger.error(f" - MCP server process crashed or timed out")
|
| 124 |
import traceback
|
| 125 |
+
logger.error(f" Full traceback: {traceback.format_exc()}")
|
| 126 |
try:
|
| 127 |
await session.__aexit__(None, None, None)
|
| 128 |
+
except Exception as cleanup_error:
|
| 129 |
+
logger.debug(f"Session cleanup error (ignored): {cleanup_error}")
|
| 130 |
try:
|
| 131 |
await stdio_ctx.__aexit__(None, None, None)
|
| 132 |
+
except Exception as cleanup_error:
|
| 133 |
+
logger.debug(f"Stdio context cleanup error (ignored): {cleanup_error}")
|
| 134 |
return None
|
| 135 |
|
| 136 |
config.global_mcp_session = session
|
|
|
|
| 224 |
logger.warning("Failed to get MCP session for Gemini call - check GEMINI_API_KEY and agent.py")
|
| 225 |
# Invalidate session to force retry on next call
|
| 226 |
config.global_mcp_session = None
|
| 227 |
+
config.global_mcp_stdio_ctx = None
|
| 228 |
return ""
|
| 229 |
|
| 230 |
tools = await get_cached_mcp_tools()
|
| 231 |
if not tools:
|
| 232 |
+
logger.info("MCP tools cache empty, refreshing...")
|
| 233 |
tools = await get_cached_mcp_tools(force_refresh=True)
|
| 234 |
if not tools:
|
| 235 |
logger.error("Unable to obtain MCP tool catalog for Gemini calls")
|
| 236 |
+
# Invalidate session to force retry on next call
|
| 237 |
+
config.global_mcp_session = None
|
| 238 |
+
config.global_mcp_stdio_ctx = None
|
| 239 |
return ""
|
| 240 |
|
| 241 |
generate_tool = None
|
|
|
|
| 262 |
if temperature is not None:
|
| 263 |
arguments["temperature"] = temperature
|
| 264 |
|
| 265 |
+
logger.debug(f"Calling MCP tool {generate_tool.name} with arguments: {list(arguments.keys())}")
|
| 266 |
result = await session.call_tool(generate_tool.name, arguments=arguments)
|
| 267 |
|
| 268 |
if hasattr(result, 'content') and result.content:
|
| 269 |
for item in result.content:
|
| 270 |
if hasattr(item, 'text'):
|
| 271 |
response_text = item.text.strip()
|
| 272 |
+
if response_text:
|
| 273 |
+
logger.debug(f"Gemini MCP returned {len(response_text)} chars")
|
| 274 |
+
return response_text
|
| 275 |
logger.warning("⚠️ Gemini MCP returned empty or invalid result")
|
| 276 |
return ""
|
| 277 |
except Exception as e:
|
| 278 |
+
error_type = type(e).__name__
|
| 279 |
+
error_msg = str(e)
|
| 280 |
+
logger.error(f"Gemini MCP call error: {error_type}: {error_msg}")
|
| 281 |
+
import traceback
|
| 282 |
+
logger.debug(f"Full traceback: {traceback.format_exc()}")
|
| 283 |
+
# Invalidate session on error to force retry
|
| 284 |
+
config.global_mcp_session = None
|
| 285 |
+
config.global_mcp_stdio_ctx = None
|
| 286 |
return ""
|
| 287 |
|
|
@@ -41,14 +41,28 @@ Respond in JSON format:
|
|
| 41 |
temperature=0.3
|
| 42 |
)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
try:
|
| 45 |
json_start = response.find('{')
|
| 46 |
json_end = response.rfind('}') + 1
|
| 47 |
if json_start >= 0 and json_end > json_start:
|
| 48 |
reasoning = json.loads(response[json_start:json_end])
|
| 49 |
else:
|
| 50 |
-
raise ValueError("No JSON found")
|
| 51 |
-
except:
|
|
|
|
|
|
|
| 52 |
reasoning = {
|
| 53 |
"query_type": "general_info",
|
| 54 |
"complexity": "moderate",
|
|
|
|
| 41 |
temperature=0.3
|
| 42 |
)
|
| 43 |
|
| 44 |
+
if not response or not response.strip():
|
| 45 |
+
logger.warning("Gemini MCP returned empty response for reasoning, using fallback")
|
| 46 |
+
reasoning = {
|
| 47 |
+
"query_type": "general_info",
|
| 48 |
+
"complexity": "moderate",
|
| 49 |
+
"information_needs": ["medical information"],
|
| 50 |
+
"requires_rag": True,
|
| 51 |
+
"requires_web_search": False,
|
| 52 |
+
"sub_questions": [query]
|
| 53 |
+
}
|
| 54 |
+
return reasoning
|
| 55 |
+
|
| 56 |
try:
|
| 57 |
json_start = response.find('{')
|
| 58 |
json_end = response.rfind('}') + 1
|
| 59 |
if json_start >= 0 and json_end > json_start:
|
| 60 |
reasoning = json.loads(response[json_start:json_end])
|
| 61 |
else:
|
| 62 |
+
raise ValueError("No JSON found in response")
|
| 63 |
+
except Exception as e:
|
| 64 |
+
logger.error(f"Failed to parse reasoning response as JSON: {e}")
|
| 65 |
+
logger.debug(f"Response was: {response[:200]}...")
|
| 66 |
reasoning = {
|
| 67 |
"query_type": "general_info",
|
| 68 |
"complexity": "moderate",
|
|
@@ -72,6 +72,19 @@ Guidelines:
|
|
| 72 |
temperature=0.3
|
| 73 |
)
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
try:
|
| 76 |
json_start = response.find('{')
|
| 77 |
json_end = response.rfind('}') + 1
|
|
@@ -80,9 +93,10 @@ Guidelines:
|
|
| 80 |
logger.info(f"[GEMINI SUPERVISOR] Query broken into {len(breakdown.get('sub_topics', []))} sub-topics")
|
| 81 |
return breakdown
|
| 82 |
else:
|
| 83 |
-
raise ValueError("Supervisor JSON not found")
|
| 84 |
except Exception as exc:
|
| 85 |
logger.error(f"[GEMINI SUPERVISOR] Breakdown parsing failed: {exc}")
|
|
|
|
| 86 |
breakdown = {
|
| 87 |
"sub_topics": [
|
| 88 |
{"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},
|
|
|
|
| 72 |
temperature=0.3
|
| 73 |
)
|
| 74 |
|
| 75 |
+
if not response or not response.strip():
|
| 76 |
+
logger.warning("[GEMINI SUPERVISOR] Gemini MCP returned empty response for breakdown, using fallback")
|
| 77 |
+
breakdown = {
|
| 78 |
+
"sub_topics": [
|
| 79 |
+
{"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},
|
| 80 |
+
{"id": 2, "topic": "Clinical Details", "instruction": "Provide key clinical insights", "expected_tokens": 200, "priority": "medium", "approach": "clinical perspective"},
|
| 81 |
+
],
|
| 82 |
+
"strategy": "Sequential answer with key points",
|
| 83 |
+
"exploration_note": "Fallback breakdown - basic coverage"
|
| 84 |
+
}
|
| 85 |
+
logger.warning(f"[GEMINI SUPERVISOR] Using fallback breakdown")
|
| 86 |
+
return breakdown
|
| 87 |
+
|
| 88 |
try:
|
| 89 |
json_start = response.find('{')
|
| 90 |
json_end = response.rfind('}') + 1
|
|
|
|
| 93 |
logger.info(f"[GEMINI SUPERVISOR] Query broken into {len(breakdown.get('sub_topics', []))} sub-topics")
|
| 94 |
return breakdown
|
| 95 |
else:
|
| 96 |
+
raise ValueError("Supervisor JSON not found in response")
|
| 97 |
except Exception as exc:
|
| 98 |
logger.error(f"[GEMINI SUPERVISOR] Breakdown parsing failed: {exc}")
|
| 99 |
+
logger.debug(f"Response was: {response[:200]}...")
|
| 100 |
breakdown = {
|
| 101 |
"sub_topics": [
|
| 102 |
{"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},
|