Y Phung Nguyen commited on
Commit
4a43fcc
·
1 Parent(s): ffcfd50

Fix Gemini MCP error handling and session management

Browse files

- Improve error logging in call_agent() with full tracebacks and detailed error messages
- Add session health check with timeout to detect and recover from invalid sessions
- Automatically invalidate and recreate sessions on errors to force retry
- Enhance empty response handling in reasoning.py and supervisor.py with proper fallback
- Add timeout protection (2s) to health checks to prevent hanging
- Improve session initialization error messages with specific failure causes
- Better distinguish between different MCP failure modes for easier debugging

This fixes issues where MCP sessions would fail silently and fall back to default behavior without proper error reporting. The app now provides better diagnostics while maintaining graceful fallback functionality.

Files changed (3) hide show
  1. client.py +48 -8
  2. reasoning.py +16 -2
  3. supervisor.py +15 -1
client.py CHANGED
@@ -42,8 +42,32 @@ async def get_mcp_session():
42
  logger.warning("MCP not available - SDK not installed")
43
  return None
44
 
 
45
  if config.global_mcp_session is not None:
46
- return config.global_mcp_session
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  try:
49
  mcp_env = os.environ.copy()
@@ -96,16 +120,17 @@ async def get_mcp_session():
96
  logger.error(f" - Invalid GEMINI_API_KEY")
97
  logger.error(f" - agent.py server not starting correctly")
98
  logger.error(f" - Network/firewall issues")
 
99
  import traceback
100
- logger.debug(f" Full traceback: {traceback.format_exc()}")
101
  try:
102
  await session.__aexit__(None, None, None)
103
- except Exception:
104
- pass
105
  try:
106
  await stdio_ctx.__aexit__(None, None, None)
107
- except Exception:
108
- pass
109
  return None
110
 
111
  config.global_mcp_session = session
@@ -199,13 +224,18 @@ async def call_agent(user_prompt: str, system_prompt: str = None, files: list =
199
  logger.warning("Failed to get MCP session for Gemini call - check GEMINI_API_KEY and agent.py")
200
  # Invalidate session to force retry on next call
201
  config.global_mcp_session = None
 
202
  return ""
203
 
204
  tools = await get_cached_mcp_tools()
205
  if not tools:
 
206
  tools = await get_cached_mcp_tools(force_refresh=True)
207
  if not tools:
208
  logger.error("Unable to obtain MCP tool catalog for Gemini calls")
 
 
 
209
  return ""
210
 
211
  generate_tool = None
@@ -232,16 +262,26 @@ async def call_agent(user_prompt: str, system_prompt: str = None, files: list =
232
  if temperature is not None:
233
  arguments["temperature"] = temperature
234
 
 
235
  result = await session.call_tool(generate_tool.name, arguments=arguments)
236
 
237
  if hasattr(result, 'content') and result.content:
238
  for item in result.content:
239
  if hasattr(item, 'text'):
240
  response_text = item.text.strip()
241
- return response_text
 
 
242
  logger.warning("⚠️ Gemini MCP returned empty or invalid result")
243
  return ""
244
  except Exception as e:
245
- logger.error(f"Gemini MCP call error: {e}")
 
 
 
 
 
 
 
246
  return ""
247
 
 
42
  logger.warning("MCP not available - SDK not installed")
43
  return None
44
 
45
+ # Check if existing session is still valid
46
  if config.global_mcp_session is not None:
47
+ try:
48
+ # Try to list tools as a quick health check (with timeout)
49
+ await asyncio.wait_for(config.global_mcp_session.list_tools(), timeout=2.0)
50
+ return config.global_mcp_session
51
+ except asyncio.TimeoutError:
52
+ logger.warning("MCP session health check timed out, recreating session")
53
+ # Clean up invalid session
54
+ try:
55
+ if config.global_mcp_stdio_ctx is not None:
56
+ await config.global_mcp_stdio_ctx.__aexit__(None, None, None)
57
+ except Exception:
58
+ pass
59
+ config.global_mcp_session = None
60
+ config.global_mcp_stdio_ctx = None
61
+ except Exception as e:
62
+ logger.warning(f"Existing MCP session appears invalid, recreating: {type(e).__name__}: {e}")
63
+ # Clean up invalid session
64
+ try:
65
+ if config.global_mcp_stdio_ctx is not None:
66
+ await config.global_mcp_stdio_ctx.__aexit__(None, None, None)
67
+ except Exception:
68
+ pass
69
+ config.global_mcp_session = None
70
+ config.global_mcp_stdio_ctx = None
71
 
72
  try:
73
  mcp_env = os.environ.copy()
 
120
  logger.error(f" - Invalid GEMINI_API_KEY")
121
  logger.error(f" - agent.py server not starting correctly")
122
  logger.error(f" - Network/firewall issues")
123
+ logger.error(f" - MCP server process crashed or timed out")
124
  import traceback
125
+ logger.error(f" Full traceback: {traceback.format_exc()}")
126
  try:
127
  await session.__aexit__(None, None, None)
128
+ except Exception as cleanup_error:
129
+ logger.debug(f"Session cleanup error (ignored): {cleanup_error}")
130
  try:
131
  await stdio_ctx.__aexit__(None, None, None)
132
+ except Exception as cleanup_error:
133
+ logger.debug(f"Stdio context cleanup error (ignored): {cleanup_error}")
134
  return None
135
 
136
  config.global_mcp_session = session
 
224
  logger.warning("Failed to get MCP session for Gemini call - check GEMINI_API_KEY and agent.py")
225
  # Invalidate session to force retry on next call
226
  config.global_mcp_session = None
227
+ config.global_mcp_stdio_ctx = None
228
  return ""
229
 
230
  tools = await get_cached_mcp_tools()
231
  if not tools:
232
+ logger.info("MCP tools cache empty, refreshing...")
233
  tools = await get_cached_mcp_tools(force_refresh=True)
234
  if not tools:
235
  logger.error("Unable to obtain MCP tool catalog for Gemini calls")
236
+ # Invalidate session to force retry on next call
237
+ config.global_mcp_session = None
238
+ config.global_mcp_stdio_ctx = None
239
  return ""
240
 
241
  generate_tool = None
 
262
  if temperature is not None:
263
  arguments["temperature"] = temperature
264
 
265
+ logger.debug(f"Calling MCP tool {generate_tool.name} with arguments: {list(arguments.keys())}")
266
  result = await session.call_tool(generate_tool.name, arguments=arguments)
267
 
268
  if hasattr(result, 'content') and result.content:
269
  for item in result.content:
270
  if hasattr(item, 'text'):
271
  response_text = item.text.strip()
272
+ if response_text:
273
+ logger.debug(f"Gemini MCP returned {len(response_text)} chars")
274
+ return response_text
275
  logger.warning("⚠️ Gemini MCP returned empty or invalid result")
276
  return ""
277
  except Exception as e:
278
+ error_type = type(e).__name__
279
+ error_msg = str(e)
280
+ logger.error(f"Gemini MCP call error: {error_type}: {error_msg}")
281
+ import traceback
282
+ logger.debug(f"Full traceback: {traceback.format_exc()}")
283
+ # Invalidate session on error to force retry
284
+ config.global_mcp_session = None
285
+ config.global_mcp_stdio_ctx = None
286
  return ""
287
 
reasoning.py CHANGED
@@ -41,14 +41,28 @@ Respond in JSON format:
41
  temperature=0.3
42
  )
43
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
  json_start = response.find('{')
46
  json_end = response.rfind('}') + 1
47
  if json_start >= 0 and json_end > json_start:
48
  reasoning = json.loads(response[json_start:json_end])
49
  else:
50
- raise ValueError("No JSON found")
51
- except:
 
 
52
  reasoning = {
53
  "query_type": "general_info",
54
  "complexity": "moderate",
 
41
  temperature=0.3
42
  )
43
 
44
+ if not response or not response.strip():
45
+ logger.warning("Gemini MCP returned empty response for reasoning, using fallback")
46
+ reasoning = {
47
+ "query_type": "general_info",
48
+ "complexity": "moderate",
49
+ "information_needs": ["medical information"],
50
+ "requires_rag": True,
51
+ "requires_web_search": False,
52
+ "sub_questions": [query]
53
+ }
54
+ return reasoning
55
+
56
  try:
57
  json_start = response.find('{')
58
  json_end = response.rfind('}') + 1
59
  if json_start >= 0 and json_end > json_start:
60
  reasoning = json.loads(response[json_start:json_end])
61
  else:
62
+ raise ValueError("No JSON found in response")
63
+ except Exception as e:
64
+ logger.error(f"Failed to parse reasoning response as JSON: {e}")
65
+ logger.debug(f"Response was: {response[:200]}...")
66
  reasoning = {
67
  "query_type": "general_info",
68
  "complexity": "moderate",
supervisor.py CHANGED
@@ -72,6 +72,19 @@ Guidelines:
72
  temperature=0.3
73
  )
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  try:
76
  json_start = response.find('{')
77
  json_end = response.rfind('}') + 1
@@ -80,9 +93,10 @@ Guidelines:
80
  logger.info(f"[GEMINI SUPERVISOR] Query broken into {len(breakdown.get('sub_topics', []))} sub-topics")
81
  return breakdown
82
  else:
83
- raise ValueError("Supervisor JSON not found")
84
  except Exception as exc:
85
  logger.error(f"[GEMINI SUPERVISOR] Breakdown parsing failed: {exc}")
 
86
  breakdown = {
87
  "sub_topics": [
88
  {"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},
 
72
  temperature=0.3
73
  )
74
 
75
+ if not response or not response.strip():
76
+ logger.warning("[GEMINI SUPERVISOR] Gemini MCP returned empty response for breakdown, using fallback")
77
+ breakdown = {
78
+ "sub_topics": [
79
+ {"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},
80
+ {"id": 2, "topic": "Clinical Details", "instruction": "Provide key clinical insights", "expected_tokens": 200, "priority": "medium", "approach": "clinical perspective"},
81
+ ],
82
+ "strategy": "Sequential answer with key points",
83
+ "exploration_note": "Fallback breakdown - basic coverage"
84
+ }
85
+ logger.warning(f"[GEMINI SUPERVISOR] Using fallback breakdown")
86
+ return breakdown
87
+
88
  try:
89
  json_start = response.find('{')
90
  json_end = response.rfind('}') + 1
 
93
  logger.info(f"[GEMINI SUPERVISOR] Query broken into {len(breakdown.get('sub_topics', []))} sub-topics")
94
  return breakdown
95
  else:
96
+ raise ValueError("Supervisor JSON not found in response")
97
  except Exception as exc:
98
  logger.error(f"[GEMINI SUPERVISOR] Breakdown parsing failed: {exc}")
99
+ logger.debug(f"Response was: {response[:200]}...")
100
  breakdown = {
101
  "sub_topics": [
102
  {"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},