Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 14 days ago

Commit

dd13e35

1 Parent(s): f7415cc

Reduce supervisor processing latency

Browse files

Files changed (2) hide show

client.py +120 -99
pipeline.py +58 -48

client.py CHANGED Viewed

@@ -196,7 +196,9 @@ async def test_mcp_connection() -> bool:
 async def call_agent_direct_api(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:
-    """Call Gemini API directly without MCP"""
     if not GEMINI_DIRECT_AVAILABLE:
         logger.error("❌ google-genai not installed - cannot use direct API")
         return ""
@@ -205,106 +207,125 @@ async def call_agent_direct_api(user_prompt: str, system_prompt: str = None, fil
         logger.warning("GEMINI_API_KEY not set - cannot use Gemini API")
         return ""
-    try:
-        gemini_client = genai.Client(api_key=config.GEMINI_API_KEY)
-        model_name = model or config.GEMINI_MODEL
-        temp = temperature if temperature is not None else 0.2
-        # Prepare content
-        contents = user_prompt
-        if system_prompt:
-            contents = f"{system_prompt}\n\n{user_prompt}"
-        gemini_contents = [contents]
-        # Handle files if provided
-        if files:
-            for file_obj in files:
-                try:
-                    if "path" in file_obj:
-                        file_path = file_obj["path"]
-                        mime_type = file_obj.get("type")
-                        if not os.path.exists(file_path):
-                            logger.warning(f"File not found: {file_path}")
-                            continue
-                        with open(file_path, 'rb') as f:
-                            file_data = f.read()
-                        if not mime_type:
-                            from mimetypes import guess_type
-                            mime_type, _ = guess_type(file_path)
                             if not mime_type:
-                                mime_type = "application/octet-stream"
-                        gemini_contents.append({
-                            "inline_data": {
-                                "mime_type": mime_type,
-                                "data": base64.b64encode(file_data).decode('utf-8')
-                            }
-                        })
-                    elif "content" in file_obj:
-                        file_data = base64.b64decode(file_obj["content"])
-                        mime_type = file_obj.get("type", "application/octet-stream")
-                        gemini_contents.append({
-                            "inline_data": {
-                                "mime_type": mime_type,
-                                "data": file_obj["content"]
-                            }
-                        })
-                except Exception as e:
-                    logger.warning(f"Error processing file: {e}")
-                    continue
-        generation_config = {
-            "temperature": temp,
-            "max_output_tokens": int(os.environ.get("GEMINI_MAX_OUTPUT_TOKENS", "8192"))
-        }
-        logger.info(f"🔵 Calling Gemini API directly with model={model_name}, temperature={temp}")
-        def generate_sync():
-            return gemini_client.models.generate_content(
-                model=model_name,
-                contents=gemini_contents,
-                config=generation_config,
             )
-        timeout_seconds = min(int(os.environ.get("GEMINI_TIMEOUT", "300000")) / 1000.0, 20.0)
-        response = await asyncio.wait_for(
-            asyncio.to_thread(generate_sync),
-            timeout=timeout_seconds
-        )
-        logger.info(f"✅ Gemini API call completed successfully")
-        # Extract text from response
-        if response and hasattr(response, 'text') and response.text:
-            return response.text.strip()
-        elif response and hasattr(response, 'candidates') and response.candidates:
-            text_parts = []
-            for candidate in response.candidates:
-                if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts'):
-                    for part in candidate.content.parts:
-                        if hasattr(part, 'text'):
-                            text_parts.append(part.text)
-            if text_parts:
-                return ''.join(text_parts).strip()
-        logger.warning("⚠️ Gemini API returned empty response")
-        return ""
-    except asyncio.TimeoutError:
-        logger.error(f"❌ Gemini API call timed out")
-        return ""
-    except Exception as e:
-        error_type = type(e).__name__
-        error_msg = str(e)
-        logger.error(f"❌ Gemini API call error: {error_type}: {error_msg}")
-        import traceback
-        logger.error(f"Full traceback: {traceback.format_exc()}")
-        return ""
 async def call_agent(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:

 async def call_agent_direct_api(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:
+    """Call Gemini API directly without MCP
+    Includes retry logic with exponential backoff to handle GPU task aborted errors
+    """
     if not GEMINI_DIRECT_AVAILABLE:
         logger.error("❌ google-genai not installed - cannot use direct API")
         return ""
         logger.warning("GEMINI_API_KEY not set - cannot use Gemini API")
         return ""
+    max_retries = 3
+    base_delay = 1.0  # Base delay in seconds
+    for attempt in range(max_retries):
+        try:
+            gemini_client = genai.Client(api_key=config.GEMINI_API_KEY)
+            model_name = model or config.GEMINI_MODEL
+            temp = temperature if temperature is not None else 0.2
+            # Prepare content
+            contents = user_prompt
+            if system_prompt:
+                contents = f"{system_prompt}\n\n{user_prompt}"
+            gemini_contents = [contents]
+            # Handle files if provided
+            if files:
+                for file_obj in files:
+                    try:
+                        if "path" in file_obj:
+                            file_path = file_obj["path"]
+                            mime_type = file_obj.get("type")
+                            if not os.path.exists(file_path):
+                                logger.warning(f"File not found: {file_path}")
+                                continue
+                            with open(file_path, 'rb') as f:
+                                file_data = f.read()
                             if not mime_type:
+                                from mimetypes import guess_type
+                                mime_type, _ = guess_type(file_path)
+                                if not mime_type:
+                                    mime_type = "application/octet-stream"
+                            gemini_contents.append({
+                                "inline_data": {
+                                    "mime_type": mime_type,
+                                    "data": base64.b64encode(file_data).decode('utf-8')
+                                }
+                            })
+                        elif "content" in file_obj:
+                            file_data = base64.b64decode(file_obj["content"])
+                            mime_type = file_obj.get("type", "application/octet-stream")
+                            gemini_contents.append({
+                                "inline_data": {
+                                    "mime_type": mime_type,
+                                    "data": file_obj["content"]
+                                }
+                            })
+                    except Exception as e:
+                        logger.warning(f"Error processing file: {e}")
+                        continue
+            generation_config = {
+                "temperature": temp,
+                "max_output_tokens": int(os.environ.get("GEMINI_MAX_OUTPUT_TOKENS", "8192"))
+            }
+            logger.info(f"🔵 Calling Gemini API directly with model={model_name}, temperature={temp}")
+            def generate_sync():
+                return gemini_client.models.generate_content(
+                    model=model_name,
+                    contents=gemini_contents,
+                    config=generation_config,
+                )
+            timeout_seconds = min(int(os.environ.get("GEMINI_TIMEOUT", "300000")) / 1000.0, 20.0)
+            response = await asyncio.wait_for(
+                asyncio.to_thread(generate_sync),
+                timeout=timeout_seconds
             )
+            logger.info(f"✅ Gemini API call completed successfully")
+            # Extract text from response
+            if response and hasattr(response, 'text') and response.text:
+                return response.text.strip()
+            elif response and hasattr(response, 'candidates') and response.candidates:
+                text_parts = []
+                for candidate in response.candidates:
+                    if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts'):
+                        for part in candidate.content.parts:
+                            if hasattr(part, 'text'):
+                                text_parts.append(part.text)
+                if text_parts:
+                    return ''.join(text_parts).strip()
+            logger.warning("⚠️ Gemini API returned empty response")
+            return ""
+        except asyncio.TimeoutError:
+            if attempt < max_retries - 1:
+                delay = base_delay * (2 ** attempt)  # Exponential backoff: 1s, 2s, 4s
+                logger.warning(f"⏳ Gemini API call timed out (attempt {attempt + 1}/{max_retries}), retrying after {delay}s...")
+                await asyncio.sleep(delay)
+                continue
+            else:
+                logger.error(f"❌ Gemini API call timed out after {max_retries} attempts")
+                return ""
+        except Exception as e:
+            error_type = type(e).__name__
+            error_msg = str(e).lower()
+            is_gpu_error = 'gpu task aborted' in error_msg or ('gpu' in error_msg and 'abort' in error_msg)
+            if is_gpu_error and attempt < max_retries - 1:
+                delay = base_delay * (2 ** attempt)  # Exponential backoff: 1s, 2s, 4s
+                logger.warning(f"⏳ Gemini API GPU task aborted (attempt {attempt + 1}/{max_retries}), retrying after {delay}s...")
+                await asyncio.sleep(delay)
+                continue
+            else:
+                logger.error(f"❌ Gemini API call error after {attempt + 1} attempts: {error_type}: {str(e)}")
+                if attempt == max_retries - 1:
+                    import traceback
+                    logger.error(f"Full traceback: {traceback.format_exc()}")
+                return ""
 async def call_agent(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:

pipeline.py CHANGED Viewed

@@ -47,54 +47,64 @@ def run_gemini_in_thread(fn, *args, **kwargs):
     except concurrent.futures.TimeoutError:
         logger.error(f"[GEMINI SUPERVISOR] Function {fn.__name__} timed out after 30s")
         # Return fallback based on function
-        if "breakdown" in fn.__name__:
-            return {
-                "sub_topics": [
-                    {"id": 1, "topic": "Answer", "instruction": args[0] if args else "Address the question", "expected_tokens": 400, "priority": "high", "approach": "direct answer"}
-                ],
-                "strategy": "Direct answer (timeout fallback)",
-                "exploration_note": "Gemini supervisor timeout"
-            }
-        elif "search_strategies" in fn.__name__:
-            return {
-                "search_strategies": [
-                    {"id": 1, "strategy": args[0] if args else "", "target_sources": 2, "focus": "main query"}
-                ],
-                "max_strategies": 1
-            }
-        elif "rag_brainstorm" in fn.__name__:
-            return {
-                "contexts": [
-                    {"id": 1, "context": args[1][:500] if len(args) > 1 else "", "focus": "retrieved information", "relevance": "high"}
-                ],
-                "max_contexts": 1
-            }
-        elif "synthesize" in fn.__name__:
-            return "\n\n".join(args[1] if len(args) > 1 else [])
-        elif "challenge" in fn.__name__:
-            return {"is_optimal": True, "completeness_score": 7, "accuracy_score": 7, "clarity_score": 7, "missing_aspects": [], "inaccuracies": [], "improvement_suggestions": [], "needs_more_context": False, "enhancement_instructions": ""}
-        elif "enhance_answer" in fn.__name__:
-            return args[1] if len(args) > 1 else ""
-        elif "check_clarity" in fn.__name__:
-            return {"is_unclear": False, "needs_search": False, "search_queries": []}
-        elif "clinical_intake_triage" in fn.__name__:
-            return {
-                "needs_additional_info": False,
-                "decision_reason": "Timeout fallback",
-                "max_rounds": args[2] if len(args) > 2 else 5,
-                "questions": [],
-                "initial_hypotheses": []
-            }
-        elif "summarize_clinical_insights" in fn.__name__:
-            return {
-                "patient_profile": "",
-                "refined_problem_statement": args[0] if args else "",
-                "key_findings": [],
-                "handoff_note": "Proceed with regular workflow."
-            }
-        else:
-            logger.warning(f"[GEMINI SUPERVISOR] Unknown function {fn.__name__}, returning None")
-            return None
     except Exception as e:
         logger.error(f"[GEMINI SUPERVISOR] Error running {fn.__name__} in thread: {e}")
         # Return appropriate fallback

     except concurrent.futures.TimeoutError:
         logger.error(f"[GEMINI SUPERVISOR] Function {fn.__name__} timed out after 30s")
         # Return fallback based on function
+        return _supervisor_logics(fn.__name__, args)
+    except Exception as e:
+        logger.error(f"[GEMINI SUPERVISOR] Function {fn.__name__} failed with error: {type(e).__name__}: {str(e)}")
+        # Return fallback based on function
+        return _supervisor_logics(fn.__name__, args)
+def _supervisor_logics(fn_name: str, args: tuple):
+    """Get appropriate fallback value based on function name"""
+    if "breakdown" in fn_name:
+        return {
+            "sub_topics": [
+                {"id": 1, "topic": "Answer", "instruction": args[0] if args else "Address the question", "expected_tokens": 400, "priority": "high", "approach": "direct answer"}
+            ],
+            "strategy": "Direct answer (fallback)",
+            "exploration_note": "Gemini supervisor error"
+        }
+    elif "search_strategies" in fn_name:
+        return {
+            "search_strategies": [
+                {"id": 1, "strategy": args[0] if args else "", "target_sources": 2, "focus": "main query"}
+            ],
+            "max_strategies": 1
+        }
+    elif "rag_brainstorm" in fn_name:
+        return {
+            "contexts": [
+                {"id": 1, "context": args[1][:500] if len(args) > 1 else "", "focus": "retrieved information", "relevance": "high"}
+            ],
+            "max_contexts": 1
+        }
+    elif "synthesize" in fn_name:
+        # Return concatenated MedSwin answers as fallback
+        return "\n\n".join(args[1] if len(args) > 1 and args[1] else [])
+    elif "challenge" in fn_name:
+        return {"is_optimal": True, "completeness_score": 7, "accuracy_score": 7, "clarity_score": 7, "missing_aspects": [], "inaccuracies": [], "improvement_suggestions": [], "needs_more_context": False, "enhancement_instructions": ""}
+    elif "enhance_answer" in fn_name:
+        return args[1] if len(args) > 1 else ""
+    elif "check_clarity" in fn_name:
+        return {"is_unclear": False, "needs_search": False, "search_queries": []}
+    elif "clinical_intake_triage" in fn_name:
+        return {
+            "needs_additional_info": False,
+            "decision_reason": "Error fallback",
+            "max_rounds": args[2] if len(args) > 2 else 5,
+            "questions": [],
+            "initial_hypotheses": []
+        }
+    elif "summarize_clinical_insights" in fn_name:
+        return {
+            "patient_profile": "",
+            "refined_problem_statement": args[0] if args else "",
+            "key_findings": [],
+            "handoff_note": "Proceed with regular workflow."
+        }
+    else:
+        logger.warning(f"[GEMINI SUPERVISOR] Unknown function {fn_name}, returning None")
+        return None
     except Exception as e:
         logger.error(f"[GEMINI SUPERVISOR] Error running {fn.__name__} in thread: {e}")
         # Return appropriate fallback