Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 22 days ago

Commit

1fc52ea

1 Parent(s): 7a7ea02

Run Gemini in thread to avoid timeout

Browse files

Files changed (9) hide show

agent.py +1 -1
models.py +6 -7
pipeline.py +106 -13
reasoning.py +0 -4
requirements.txt +1 -0
search.py +1 -5
supervisor.py +0 -20
utils.py +1 -4
voice.py +0 -4

agent.py CHANGED Viewed

@@ -67,7 +67,7 @@ GEMINI_MAX_TOTAL_FILE_SIZE = int(os.environ.get("GEMINI_MAX_TOTAL_FILE_SIZE", "5
 GEMINI_TEMPERATURE = float(os.environ.get("GEMINI_TEMPERATURE", "0.2"))
 # Initialize MCP server
-server = Server("gemini-mcp-server")
 def decode_base64_file(content: str, mime_type: str = None) -> bytes:
     """Decode base64 encoded file content"""

 GEMINI_TEMPERATURE = float(os.environ.get("GEMINI_TEMPERATURE", "0.2"))
 # Initialize MCP server
+server = Server("server-mcp-agent")
 def decode_base64_file(content: str, mime_type: str = None) -> bytes:
     """Decode base64 encoded file content"""

models.py CHANGED Viewed

@@ -18,20 +18,19 @@ except ImportError:
 _model_loading_states = {}
 _model_loading_lock = threading.Lock()
 def set_model_loading_state(model_name: str, state: str):
     """Set model loading state: 'loading', 'loaded', 'error'"""
     with _model_loading_lock:
         _model_loading_states[model_name] = state
         logger.debug(f"Model {model_name} state set to: {state}")
 def get_model_loading_state(model_name: str) -> str:
     """Get model loading state: 'loading', 'loaded', 'error', or 'unknown'"""
     with _model_loading_lock:
         return _model_loading_states.get(model_name, "unknown")
 def is_model_loaded(model_name: str) -> bool:
     """Check if model is loaded and ready"""
     with _model_loading_lock:
@@ -39,7 +38,7 @@ def is_model_loaded(model_name: str) -> bool:
                 config.global_medical_models[model_name] is not None and
                 _model_loading_states.get(model_name) == "loaded")
 def initialize_medical_model(model_name: str):
     """Initialize medical model (MedSwin) - download on demand"""
     if model_name not in config.global_medical_models or config.global_medical_models[model_name] is None:
@@ -69,7 +68,7 @@ def initialize_medical_model(model_name: str):
             set_model_loading_state(model_name, "loaded")
     return config.global_medical_models[model_name], config.global_medical_tokenizers[model_name]
 def initialize_tts_model():
     """Initialize TTS model for text-to-speech"""
     if not TTS_AVAILABLE:
@@ -86,7 +85,7 @@ def initialize_tts_model():
             config.global_tts_model = None
     return config.global_tts_model
 def get_or_create_embed_model():
     """Reuse embedding model to avoid reloading weights each request"""
     if config.global_embed_model is None:
@@ -94,7 +93,7 @@ def get_or_create_embed_model():
         config.global_embed_model = HuggingFaceEmbedding(model_name=config.EMBEDDING_MODEL, token=config.HF_TOKEN)
     return config.global_embed_model
 def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
     """Get LLM for RAG indexing (uses medical model)"""
     medical_model_obj, medical_tokenizer = initialize_medical_model(config.DEFAULT_MEDICAL_MODEL)

 _model_loading_states = {}
 _model_loading_lock = threading.Lock()
+@spaces.GPU(max_duration=120)
 def set_model_loading_state(model_name: str, state: str):
     """Set model loading state: 'loading', 'loaded', 'error'"""
     with _model_loading_lock:
         _model_loading_states[model_name] = state
         logger.debug(f"Model {model_name} state set to: {state}")
+@spaces.GPU(max_duration=120)
 def get_model_loading_state(model_name: str) -> str:
     """Get model loading state: 'loading', 'loaded', 'error', or 'unknown'"""
     with _model_loading_lock:
         return _model_loading_states.get(model_name, "unknown")
 def is_model_loaded(model_name: str) -> bool:
     """Check if model is loaded and ready"""
     with _model_loading_lock:
                 config.global_medical_models[model_name] is not None and
                 _model_loading_states.get(model_name) == "loaded")
+@spaces.GPU(max_duration=120)
 def initialize_medical_model(model_name: str):
     """Initialize medical model (MedSwin) - download on demand"""
     if model_name not in config.global_medical_models or config.global_medical_models[model_name] is None:
             set_model_loading_state(model_name, "loaded")
     return config.global_medical_models[model_name], config.global_medical_tokenizers[model_name]
+@spaces.GPU(max_duration=120)
 def initialize_tts_model():
     """Initialize TTS model for text-to-speech"""
     if not TTS_AVAILABLE:
             config.global_tts_model = None
     return config.global_tts_model
+@spaces.GPU(max_duration=120)
 def get_or_create_embed_model():
     """Reuse embedding model to avoid reloading weights each request"""
     if config.global_embed_model is None:
         config.global_embed_model = HuggingFaceEmbedding(model_name=config.EMBEDDING_MODEL, token=config.HF_TOKEN)
     return config.global_embed_model
+@spaces.GPU(max_duration=120)
 def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
     """Get LLM for RAG indexing (uses medical model)"""
     medical_model_obj, medical_tokenizer = initialize_medical_model(config.DEFAULT_MEDICAL_MODEL)

pipeline.py CHANGED Viewed

@@ -29,6 +29,84 @@ MAX_CLINICAL_QA_ROUNDS = 5
 _clinical_intake_sessions = {}
 _clinical_intake_lock = threading.Lock()
 def _get_clinical_intake_state(session_id: str):
     with _clinical_intake_lock:
@@ -56,7 +134,6 @@ def _history_to_text(history: list, limit: int = 6) -> str:
         lines.append(f"{role}: {content}")
     return "\n".join(lines)
 def _format_intake_question(question: dict, round_idx: int, max_rounds: int, target_lang: str) -> str:
     header = f"🩺 Question for clarity {round_idx}/{max_rounds}"
     body = question.get("question") or "Could you share a bit more detail so I can give an accurate answer?"
@@ -214,7 +291,8 @@ def _handle_clinical_answer(session_id: str, answer_text: str):
     next_index = idx + 1
     reached_round_limit = len(state["answers"]) >= state["max_rounds"]
     if reached_round_limit or next_index >= len(questions):
-        insights = gemini_summarize_clinical_insights(state["base_query"], state["answers"])
         insights_block = _format_insights_block(insights)
         refined_query = _build_refined_query(state["base_query"], insights, insights_block)
         transcript = _format_qa_transcript(state["answers"])
@@ -384,7 +462,8 @@ def stream_chat(
                 clinical_intake_context_block = "\n\n".join([seg for seg in [summary_section, transcript_section] if seg])
         else:
             history_context = _history_to_text(history)
-            triage_plan = gemini_clinical_intake_triage(message, history_context, MAX_CLINICAL_QA_ROUNDS)
             pipeline_diagnostics["clinical_intake"]["reason"] = triage_plan.get("decision_reason", "")
             pipeline_diagnostics["clinical_intake"]["plan"] = triage_plan.get("questions", [])
             needs_intake = triage_plan.get("needs_additional_info") and triage_plan.get("questions")
@@ -448,7 +527,10 @@ def stream_chat(
         }
     else:
         logger.info("[GEMINI SUPERVISOR] Breaking query into sub-topics...")
-        breakdown = gemini_supervisor_breakdown(message, final_use_rag, final_use_web_search, elapsed(), max_duration=120)
         logger.info(f"[GEMINI SUPERVISOR] Created {len(breakdown.get('sub_topics', []))} sub-topics")
     # Update thoughts after breakdown
@@ -460,7 +542,8 @@ def stream_chat(
     if final_use_web_search:
         search_stage_start = time.time()
         logger.info("[GEMINI SUPERVISOR] Search mode: Creating search strategies...")
-        search_strategies = gemini_supervisor_search_strategies(message, elapsed())
         all_search_results = []
         strategy_jobs = []
@@ -552,7 +635,8 @@ def stream_chat(
             logger.info(f"[GEMINI SUPERVISOR] Retrieved {len(merged_nodes)} document nodes")
             logger.info("[GEMINI SUPERVISOR] Brainstorming RAG contexts...")
-            rag_brainstorm = gemini_supervisor_rag_brainstorm(message, retrieved_docs, elapsed())
             rag_contexts = [ctx.get("context", "") for ctx in rag_brainstorm.get("contexts", [])]
             logger.info(f"[GEMINI SUPERVISOR] Created {len(rag_contexts)} RAG contexts")
         record_stage("rag_retrieval", rag_stage_start)
@@ -630,7 +714,10 @@ def stream_chat(
     logger.info("[GEMINI SUPERVISOR] Synthesizing final answer from all MedSwin responses...")
     raw_medswin_answers = [ans.split('\n\n', 1)[1] if '\n\n' in ans else ans for ans in medswin_answers]
     synthesis_stage_start = time.time()
-    final_answer = gemini_supervisor_synthesize(message, raw_medswin_answers, rag_contexts, search_contexts, breakdown)
     record_stage("synthesis", synthesis_stage_start)
     if not final_answer or len(final_answer.strip()) < 50:
@@ -658,7 +745,10 @@ def stream_chat(
         challenge_iteration += 1
         logger.info(f"[GEMINI SUPERVISOR] Challenge iteration {challenge_iteration}/{max_challenge_iterations}...")
-        evaluation = gemini_supervisor_challenge(message, final_answer, raw_medswin_answers, rag_contexts, search_contexts)
         if evaluation.get("is_optimal", False):
             logger.info(f"[GEMINI SUPERVISOR] Answer confirmed optimal after {challenge_iteration} iteration(s)")
@@ -670,8 +760,9 @@ def stream_chat(
             break
         logger.info(f"[GEMINI SUPERVISOR] Enhancing answer based on feedback...")
-        enhanced_answer = gemini_supervisor_enhance_answer(
-            message, final_answer, enhancement_instructions, raw_medswin_answers, rag_contexts, search_contexts
         )
         if enhanced_answer and len(enhanced_answer.strip()) > len(final_answer.strip()) * 0.8:
@@ -685,7 +776,8 @@ def stream_chat(
     if final_use_web_search and elapsed() < soft_timeout - 10:
         logger.info("[GEMINI SUPERVISOR] Checking if additional search is needed...")
         clarity_stage_start = time.time()
-        clarity_check = gemini_supervisor_check_clarity(message, final_answer, final_use_web_search)
         record_stage("clarity_check", clarity_stage_start)
         if clarity_check.get("needs_search", False) and clarity_check.get("search_queries"):
@@ -715,8 +807,9 @@ def stream_chat(
                 if additional_summary:
                     search_contexts.append(additional_summary)
                     logger.info("[GEMINI SUPERVISOR] Enhancing answer with additional search context...")
-                    enhanced_with_search = gemini_supervisor_enhance_answer(
-                        message, final_answer,
                         f"Incorporate the following additional information from web search: {additional_summary}",
                         raw_medswin_answers, rag_contexts, search_contexts
                     )

 _clinical_intake_sessions = {}
 _clinical_intake_lock = threading.Lock()
+# Thread pool executor for running Gemini supervisor calls without blocking GPU task
+_gemini_executor = concurrent.futures.ThreadPoolExecutor(max_workers=2, thread_name_prefix="gemini-supervisor")
+def run_gemini_in_thread(fn, *args, **kwargs):
+    """
+    Run Gemini supervisor function in a separate thread to avoid blocking GPU task.
+    This ensures Gemini API calls don't consume GPU task time and cause timeouts.
+    """
+    try:
+        future = _gemini_executor.submit(fn, *args, **kwargs)
+        # Set a reasonable timeout (30s) to prevent hanging
+        result = future.result(timeout=30.0)
+        return result
+    except concurrent.futures.TimeoutError:
+        logger.error(f"[GEMINI SUPERVISOR] Function {fn.__name__} timed out after 30s")
+        # Return fallback based on function
+        if "breakdown" in fn.__name__:
+            return {
+                "sub_topics": [
+                    {"id": 1, "topic": "Answer", "instruction": args[0] if args else "Address the question", "expected_tokens": 400, "priority": "high", "approach": "direct answer"}
+                ],
+                "strategy": "Direct answer (timeout fallback)",
+                "exploration_note": "Gemini supervisor timeout"
+            }
+        elif "search_strategies" in fn.__name__:
+            return {
+                "search_strategies": [
+                    {"id": 1, "strategy": args[0] if args else "", "target_sources": 2, "focus": "main query"}
+                ],
+                "max_strategies": 1
+            }
+        elif "rag_brainstorm" in fn.__name__:
+            return {
+                "contexts": [
+                    {"id": 1, "context": args[1][:500] if len(args) > 1 else "", "focus": "retrieved information", "relevance": "high"}
+                ],
+                "max_contexts": 1
+            }
+        elif "synthesize" in fn.__name__:
+            return "\n\n".join(args[1] if len(args) > 1 else [])
+        elif "challenge" in fn.__name__:
+            return {"is_optimal": True, "completeness_score": 7, "accuracy_score": 7, "clarity_score": 7, "missing_aspects": [], "inaccuracies": [], "improvement_suggestions": [], "needs_more_context": False, "enhancement_instructions": ""}
+        elif "enhance_answer" in fn.__name__:
+            return args[1] if len(args) > 1 else ""
+        elif "check_clarity" in fn.__name__:
+            return {"is_unclear": False, "needs_search": False, "search_queries": []}
+        elif "clinical_intake_triage" in fn.__name__:
+            return {
+                "needs_additional_info": False,
+                "decision_reason": "Timeout fallback",
+                "max_rounds": args[2] if len(args) > 2 else 5,
+                "questions": [],
+                "initial_hypotheses": []
+            }
+        elif "summarize_clinical_insights" in fn.__name__:
+            return {
+                "patient_profile": "",
+                "refined_problem_statement": args[0] if args else "",
+                "key_findings": [],
+                "handoff_note": "Proceed with regular workflow."
+            }
+        else:
+            logger.warning(f"[GEMINI SUPERVISOR] Unknown function {fn.__name__}, returning None")
+            return None
+    except Exception as e:
+        logger.error(f"[GEMINI SUPERVISOR] Error running {fn.__name__} in thread: {e}")
+        # Return appropriate fallback
+        if "breakdown" in fn.__name__:
+            return {
+                "sub_topics": [
+                    {"id": 1, "topic": "Answer", "instruction": args[0] if args else "Address the question", "expected_tokens": 400, "priority": "high", "approach": "direct answer"}
+                ],
+                "strategy": "Direct answer (error fallback)",
+                "exploration_note": "Gemini supervisor error"
+            }
+        return None
 def _get_clinical_intake_state(session_id: str):
     with _clinical_intake_lock:
         lines.append(f"{role}: {content}")
     return "\n".join(lines)
 def _format_intake_question(question: dict, round_idx: int, max_rounds: int, target_lang: str) -> str:
     header = f"🩺 Question for clarity {round_idx}/{max_rounds}"
     body = question.get("question") or "Could you share a bit more detail so I can give an accurate answer?"
     next_index = idx + 1
     reached_round_limit = len(state["answers"]) >= state["max_rounds"]
     if reached_round_limit or next_index >= len(questions):
+        # Run in thread pool to avoid blocking GPU task
+        insights = run_gemini_in_thread(gemini_summarize_clinical_insights, state["base_query"], state["answers"])
         insights_block = _format_insights_block(insights)
         refined_query = _build_refined_query(state["base_query"], insights, insights_block)
         transcript = _format_qa_transcript(state["answers"])
                 clinical_intake_context_block = "\n\n".join([seg for seg in [summary_section, transcript_section] if seg])
         else:
             history_context = _history_to_text(history)
+            # Run in thread pool to avoid blocking GPU task
+            triage_plan = run_gemini_in_thread(gemini_clinical_intake_triage, message, history_context, MAX_CLINICAL_QA_ROUNDS)
             pipeline_diagnostics["clinical_intake"]["reason"] = triage_plan.get("decision_reason", "")
             pipeline_diagnostics["clinical_intake"]["plan"] = triage_plan.get("questions", [])
             needs_intake = triage_plan.get("needs_additional_info") and triage_plan.get("questions")
         }
     else:
         logger.info("[GEMINI SUPERVISOR] Breaking query into sub-topics...")
+        # Run in thread pool to avoid blocking GPU task
+        breakdown = run_gemini_in_thread(
+            gemini_supervisor_breakdown, message, final_use_rag, final_use_web_search, elapsed(), 120
+        )
         logger.info(f"[GEMINI SUPERVISOR] Created {len(breakdown.get('sub_topics', []))} sub-topics")
     # Update thoughts after breakdown
     if final_use_web_search:
         search_stage_start = time.time()
         logger.info("[GEMINI SUPERVISOR] Search mode: Creating search strategies...")
+        # Run in thread pool to avoid blocking GPU task
+        search_strategies = run_gemini_in_thread(gemini_supervisor_search_strategies, message, elapsed())
         all_search_results = []
         strategy_jobs = []
             logger.info(f"[GEMINI SUPERVISOR] Retrieved {len(merged_nodes)} document nodes")
             logger.info("[GEMINI SUPERVISOR] Brainstorming RAG contexts...")
+            # Run in thread pool to avoid blocking GPU task
+            rag_brainstorm = run_gemini_in_thread(gemini_supervisor_rag_brainstorm, message, retrieved_docs, elapsed())
             rag_contexts = [ctx.get("context", "") for ctx in rag_brainstorm.get("contexts", [])]
             logger.info(f"[GEMINI SUPERVISOR] Created {len(rag_contexts)} RAG contexts")
         record_stage("rag_retrieval", rag_stage_start)
     logger.info("[GEMINI SUPERVISOR] Synthesizing final answer from all MedSwin responses...")
     raw_medswin_answers = [ans.split('\n\n', 1)[1] if '\n\n' in ans else ans for ans in medswin_answers]
     synthesis_stage_start = time.time()
+    # Run in thread pool to avoid blocking GPU task
+    final_answer = run_gemini_in_thread(
+        gemini_supervisor_synthesize, message, raw_medswin_answers, rag_contexts, search_contexts, breakdown
+    )
     record_stage("synthesis", synthesis_stage_start)
     if not final_answer or len(final_answer.strip()) < 50:
         challenge_iteration += 1
         logger.info(f"[GEMINI SUPERVISOR] Challenge iteration {challenge_iteration}/{max_challenge_iterations}...")
+        # Run in thread pool to avoid blocking GPU task
+        evaluation = run_gemini_in_thread(
+            gemini_supervisor_challenge, message, final_answer, raw_medswin_answers, rag_contexts, search_contexts
+        )
         if evaluation.get("is_optimal", False):
             logger.info(f"[GEMINI SUPERVISOR] Answer confirmed optimal after {challenge_iteration} iteration(s)")
             break
         logger.info(f"[GEMINI SUPERVISOR] Enhancing answer based on feedback...")
+        # Run in thread pool to avoid blocking GPU task
+        enhanced_answer = run_gemini_in_thread(
+            gemini_supervisor_enhance_answer, message, final_answer, enhancement_instructions, raw_medswin_answers, rag_contexts, search_contexts
         )
         if enhanced_answer and len(enhanced_answer.strip()) > len(final_answer.strip()) * 0.8:
     if final_use_web_search and elapsed() < soft_timeout - 10:
         logger.info("[GEMINI SUPERVISOR] Checking if additional search is needed...")
         clarity_stage_start = time.time()
+        # Run in thread pool to avoid blocking GPU task
+        clarity_check = run_gemini_in_thread(gemini_supervisor_check_clarity, message, final_answer, final_use_web_search)
         record_stage("clarity_check", clarity_stage_start)
         if clarity_check.get("needs_search", False) and clarity_check.get("search_queries"):
                 if additional_summary:
                     search_contexts.append(additional_summary)
                     logger.info("[GEMINI SUPERVISOR] Enhancing answer with additional search context...")
+                    # Run in thread pool to avoid blocking GPU task
+                    enhanced_with_search = run_gemini_in_thread(
+                        gemini_supervisor_enhance_answer, message, final_answer,
                         f"Incorporate the following additional information from web search: {additional_summary}",
                         raw_medswin_answers, rag_contexts, search_contexts
                     )

reasoning.py CHANGED Viewed

@@ -10,7 +10,6 @@ try:
 except ImportError:
     nest_asyncio = None
 async def autonomous_reasoning_gemini(query: str) -> dict:
     """Autonomous reasoning using Gemini MCP"""
     reasoning_prompt = f"""Analyze this medical query and provide structured reasoning:
@@ -75,7 +74,6 @@ Respond in JSON format:
     logger.info(f"Reasoning analysis: {reasoning}")
     return reasoning
 def autonomous_reasoning(query: str, history: list) -> dict:
     """Autonomous reasoning: Analyze query complexity, intent, and information needs"""
     if not MCP_AVAILABLE:
@@ -113,7 +111,6 @@ def autonomous_reasoning(query: str, history: list) -> dict:
         "sub_questions": [query]
     }
 def create_execution_plan(reasoning: dict, query: str, has_rag_index: bool) -> dict:
     """Planning: Create multi-step execution plan based on reasoning analysis"""
     plan = {
@@ -172,7 +169,6 @@ def create_execution_plan(reasoning: dict, query: str, has_rag_index: bool) -> d
     logger.info(f"Execution plan created: {len(plan['steps'])} steps")
     return plan
 def autonomous_execution_strategy(reasoning: dict, plan: dict, use_rag: bool, use_web_search: bool, has_rag_index: bool) -> dict:
     """Autonomous execution: Make decisions on information gathering strategy"""
     strategy = {

 except ImportError:
     nest_asyncio = None
 async def autonomous_reasoning_gemini(query: str) -> dict:
     """Autonomous reasoning using Gemini MCP"""
     reasoning_prompt = f"""Analyze this medical query and provide structured reasoning:
     logger.info(f"Reasoning analysis: {reasoning}")
     return reasoning
 def autonomous_reasoning(query: str, history: list) -> dict:
     """Autonomous reasoning: Analyze query complexity, intent, and information needs"""
     if not MCP_AVAILABLE:
         "sub_questions": [query]
     }
 def create_execution_plan(reasoning: dict, query: str, has_rag_index: bool) -> dict:
     """Planning: Create multi-step execution plan based on reasoning analysis"""
     plan = {
     logger.info(f"Execution plan created: {len(plan['steps'])} steps")
     return plan
 def autonomous_execution_strategy(reasoning: dict, plan: dict, use_rag: bool, use_web_search: bool, has_rag_index: bool) -> dict:
     """Autonomous execution: Make decisions on information gathering strategy"""
     strategy = {

requirements.txt CHANGED Viewed

@@ -13,6 +13,7 @@ google-genai
 langdetect
 gradio
 gradio[mcp]
 # MCP dependencies (required for Gemini MCP)
 # Install MCP SDK: pip install mcp
 # The MCP package provides Model Context Protocol server and client functionality

 langdetect
 gradio
 gradio[mcp]
+fastmcp
 # MCP dependencies (required for Gemini MCP)
 # Install MCP SDK: pip install mcp
 # The MCP package provides Model Context Protocol server and client functionality

search.py CHANGED Viewed

@@ -97,7 +97,6 @@ async def search_web_mcp_tool(query: str, max_results: int = MAX_SEARCH_STRATEGI
         logger.error(f"Web search MCP tool error: {e}")
         return []
 async def search_web_mcp(query: str, max_results: int = MAX_SEARCH_STRATEGIES) -> list:
     """Search web using MCP tools - tries web search MCP tool first, then falls back to direct search"""
     results = await search_web_mcp_tool(query, max_results)
@@ -168,8 +167,7 @@ def search_web_fallback(query: str, max_results: int = MAX_SEARCH_STRATEGIES) ->
         logger.error(f"❌ [Direct API] Web search error: {e}")
         return []
-def search_web(query: str, max_results: int = 5) -> list:
     """Search web using MCP tools (synchronous wrapper) - prioritizes MCP over direct ddgs"""
     if MCP_AVAILABLE:
         try:
@@ -200,7 +198,6 @@ def search_web(query: str, max_results: int = 5) -> list:
     logger.info("ℹ️ [Direct API] Falling back to direct DuckDuckGo search (MCP unavailable or returned no results)")
     return search_web_fallback(query, max_results)
 async def summarize_web_content_gemini(content_list: list, query: str) -> str:
     """Summarize web search results using Gemini MCP"""
     combined_content = "\n\n".join([f"Source: {item['title']}\n{item['content']}" for item in content_list[:3]])
@@ -222,7 +219,6 @@ Summary:"""
     return result.strip()
 def summarize_web_content(content_list: list, query: str) -> str:
     """Summarize web search results using Gemini MCP"""
     if not MCP_AVAILABLE:

         logger.error(f"Web search MCP tool error: {e}")
         return []
 async def search_web_mcp(query: str, max_results: int = MAX_SEARCH_STRATEGIES) -> list:
     """Search web using MCP tools - tries web search MCP tool first, then falls back to direct search"""
     results = await search_web_mcp_tool(query, max_results)
         logger.error(f"❌ [Direct API] Web search error: {e}")
         return []
+def search_web(query: str, max_results: int = MAX_SEARCH_STRATEGIES) -> list:
     """Search web using MCP tools (synchronous wrapper) - prioritizes MCP over direct ddgs"""
     if MCP_AVAILABLE:
         try:
     logger.info("ℹ️ [Direct API] Falling back to direct DuckDuckGo search (MCP unavailable or returned no results)")
     return search_web_fallback(query, max_results)
 async def summarize_web_content_gemini(content_list: list, query: str) -> str:
     """Summarize web search results using Gemini MCP"""
     combined_content = "\n\n".join([f"Source: {item['title']}\n{item['content']}" for item in content_list[:3]])
     return result.strip()
 def summarize_web_content(content_list: list, query: str) -> str:
     """Summarize web search results using Gemini MCP"""
     if not MCP_AVAILABLE:

supervisor.py CHANGED Viewed

@@ -18,7 +18,6 @@ try:
 except ImportError:
     nest_asyncio = None
 async def gemini_supervisor_breakdown_async(query: str, use_rag: bool, use_web_search: bool, time_elapsed: float, max_duration: int = 120) -> dict:
     """Gemini Supervisor: Break user query into sub-topics"""
     remaining_time = max(15, max_duration - time_elapsed)
@@ -113,7 +112,6 @@ Guidelines:
         logger.warning(f"[GEMINI SUPERVISOR] Using fallback breakdown")
         return breakdown
 async def gemini_supervisor_search_strategies_async(query: str, time_elapsed: float) -> dict:
     """Gemini Supervisor: In search mode, break query into searching strategies"""
     prompt = f"""You are supervising web search for a medical query.
@@ -164,7 +162,6 @@ Keep strategies focused and avoid overlap."""
             "max_strategies": 1
         }
 def _prepare_clinical_question_plan(plan: dict, safe_rounds: int) -> dict:
     """Normalize Gemini question plan to 1-5 sequential prompts."""
     if not isinstance(plan, dict):
@@ -201,7 +198,6 @@ def _prepare_clinical_question_plan(plan: dict, safe_rounds: int) -> dict:
         plan["max_rounds"] = 0
     return plan
 async def gemini_supervisor_rag_brainstorm_async(query: str, retrieved_docs: str, time_elapsed: float) -> dict:
     """Gemini Supervisor: In RAG mode, brainstorm retrieved documents into 1-4 short contexts"""
     max_doc_length = 3000
@@ -258,7 +254,6 @@ Keep contexts brief and factual. Avoid redundancy."""
             "max_contexts": 1
         }
 async def gemini_clinical_intake_triage_async(
     query: str,
     history_context: str,
@@ -329,7 +324,6 @@ Guidelines:
             "initial_hypotheses": []
         }
 def gemini_clinical_intake_triage(
     query: str,
     history_context: str,
@@ -367,7 +361,6 @@ def gemini_clinical_intake_triage(
             "initial_hypotheses": []
         }
 async def gemini_summarize_clinical_insights_async(
     query: str,
     qa_pairs: list
@@ -427,7 +420,6 @@ Guidelines:
             "handoff_note": "Proceed with regular workflow."
         }
 def gemini_summarize_clinical_insights(query: str, qa_pairs: list) -> dict:
     """Wrapper for synchronous clinical insight summarization"""
     if not MCP_AVAILABLE:
@@ -463,7 +455,6 @@ def gemini_summarize_clinical_insights(query: str, qa_pairs: list) -> dict:
             "handoff_note": "Proceed with regular workflow."
         }
 def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool, time_elapsed: float, max_duration: int = 120) -> dict:
     """Wrapper to obtain supervisor breakdown synchronously"""
     if not MCP_AVAILABLE:
@@ -505,7 +496,6 @@ def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool,
             "exploration_note": "Fallback breakdown - single topic"
         }
 def gemini_supervisor_search_strategies(query: str, time_elapsed: float) -> dict:
     """Wrapper to obtain search strategies synchronously"""
     if not MCP_AVAILABLE:
@@ -534,7 +524,6 @@ def gemini_supervisor_search_strategies(query: str, time_elapsed: float) -> dict
             "max_strategies": 1
         }
 def gemini_supervisor_rag_brainstorm(query: str, retrieved_docs: str, time_elapsed: float) -> dict:
     """Wrapper to obtain RAG brainstorm synchronously"""
     if not MCP_AVAILABLE:
@@ -666,7 +655,6 @@ def execute_medswin_task(
                 logger.error(f"[MEDSWIN] Task failed after {attempt + 1} attempts: {e}")
                 raise
 async def gemini_supervisor_synthesize_async(query: str, medswin_answers: list, rag_contexts: list, search_contexts: list, breakdown: dict) -> str:
     """Gemini Supervisor: Synthesize final answer from all MedSwin responses"""
     context_summary = ""
@@ -709,7 +697,6 @@ Return the final synthesized answer in Markdown format. Do not add meta-commenta
     return result.strip()
 async def gemini_supervisor_challenge_async(query: str, current_answer: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> dict:
     """Gemini Supervisor: Challenge and evaluate the current answer"""
     context_info = ""
@@ -785,7 +772,6 @@ Return ONLY valid JSON:
             "enhancement_instructions": ""
         }
 async def gemini_supervisor_enhance_answer_async(query: str, current_answer: str, enhancement_instructions: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> str:
     """Gemini Supervisor: Enhance the answer based on challenge feedback"""
     context_info = ""
@@ -833,7 +819,6 @@ Return the enhanced answer in Markdown format. Do not add meta-commentary."""
     return result.strip()
 async def gemini_supervisor_check_clarity_async(query: str, answer: str, use_web_search: bool) -> dict:
     """Gemini Supervisor: Check if answer is unclear or supervisor is unsure"""
     if not use_web_search:
@@ -884,7 +869,6 @@ Only suggest search if the answer is genuinely unclear or has significant gaps t
         logger.error(f"[GEMINI SUPERVISOR] Clarity check parsing failed: {exc}")
         return {"is_unclear": False, "needs_search": False, "search_queries": []}
 def gemini_supervisor_synthesize(query: str, medswin_answers: list, rag_contexts: list, search_contexts: list, breakdown: dict) -> str:
     """Wrapper to synthesize answer synchronously"""
     if not MCP_AVAILABLE:
@@ -921,7 +905,6 @@ def gemini_supervisor_challenge(query: str, current_answer: str, medswin_answers
         logger.error(f"[GEMINI SUPERVISOR] Challenge failed: {exc}")
         return {"is_optimal": True, "completeness_score": 7, "accuracy_score": 7, "clarity_score": 7, "missing_aspects": [], "inaccuracies": [], "improvement_suggestions": [], "needs_more_context": False, "enhancement_instructions": ""}
 def gemini_supervisor_enhance_answer(query: str, current_answer: str, enhancement_instructions: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> str:
     """Wrapper to enhance answer synchronously"""
     if not MCP_AVAILABLE:
@@ -939,7 +922,6 @@ def gemini_supervisor_enhance_answer(query: str, current_answer: str, enhancemen
         logger.error(f"[GEMINI SUPERVISOR] Enhancement failed: {exc}")
         return current_answer
 def gemini_supervisor_check_clarity(query: str, answer: str, use_web_search: bool) -> dict:
     """Wrapper to check clarity synchronously"""
     if not MCP_AVAILABLE or not use_web_search:
@@ -957,7 +939,6 @@ def gemini_supervisor_check_clarity(query: str, answer: str, use_web_search: boo
         logger.error(f"[GEMINI SUPERVISOR] Clarity check failed: {exc}")
         return {"is_unclear": False, "needs_search": False, "search_queries": []}
 async def self_reflection_gemini(answer: str, query: str) -> dict:
     """Self-reflection using Gemini MCP"""
     reflection_prompt = f"""Evaluate this medical answer for quality and completeness:
@@ -1001,7 +982,6 @@ Respond in JSON:
     logger.info(f"Self-reflection score: {reflection.get('overall_score', 'N/A')}")
     return reflection
 def self_reflection(answer: str, query: str, reasoning: dict) -> dict:
     """Self-reflection: Evaluate answer quality and completeness"""
     if not MCP_AVAILABLE:

 except ImportError:
     nest_asyncio = None
 async def gemini_supervisor_breakdown_async(query: str, use_rag: bool, use_web_search: bool, time_elapsed: float, max_duration: int = 120) -> dict:
     """Gemini Supervisor: Break user query into sub-topics"""
     remaining_time = max(15, max_duration - time_elapsed)
         logger.warning(f"[GEMINI SUPERVISOR] Using fallback breakdown")
         return breakdown
 async def gemini_supervisor_search_strategies_async(query: str, time_elapsed: float) -> dict:
     """Gemini Supervisor: In search mode, break query into searching strategies"""
     prompt = f"""You are supervising web search for a medical query.
             "max_strategies": 1
         }
 def _prepare_clinical_question_plan(plan: dict, safe_rounds: int) -> dict:
     """Normalize Gemini question plan to 1-5 sequential prompts."""
     if not isinstance(plan, dict):
         plan["max_rounds"] = 0
     return plan
 async def gemini_supervisor_rag_brainstorm_async(query: str, retrieved_docs: str, time_elapsed: float) -> dict:
     """Gemini Supervisor: In RAG mode, brainstorm retrieved documents into 1-4 short contexts"""
     max_doc_length = 3000
             "max_contexts": 1
         }
 async def gemini_clinical_intake_triage_async(
     query: str,
     history_context: str,
             "initial_hypotheses": []
         }
 def gemini_clinical_intake_triage(
     query: str,
     history_context: str,
             "initial_hypotheses": []
         }
 async def gemini_summarize_clinical_insights_async(
     query: str,
     qa_pairs: list
             "handoff_note": "Proceed with regular workflow."
         }
 def gemini_summarize_clinical_insights(query: str, qa_pairs: list) -> dict:
     """Wrapper for synchronous clinical insight summarization"""
     if not MCP_AVAILABLE:
             "handoff_note": "Proceed with regular workflow."
         }
 def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool, time_elapsed: float, max_duration: int = 120) -> dict:
     """Wrapper to obtain supervisor breakdown synchronously"""
     if not MCP_AVAILABLE:
             "exploration_note": "Fallback breakdown - single topic"
         }
 def gemini_supervisor_search_strategies(query: str, time_elapsed: float) -> dict:
     """Wrapper to obtain search strategies synchronously"""
     if not MCP_AVAILABLE:
             "max_strategies": 1
         }
 def gemini_supervisor_rag_brainstorm(query: str, retrieved_docs: str, time_elapsed: float) -> dict:
     """Wrapper to obtain RAG brainstorm synchronously"""
     if not MCP_AVAILABLE:
                 logger.error(f"[MEDSWIN] Task failed after {attempt + 1} attempts: {e}")
                 raise
 async def gemini_supervisor_synthesize_async(query: str, medswin_answers: list, rag_contexts: list, search_contexts: list, breakdown: dict) -> str:
     """Gemini Supervisor: Synthesize final answer from all MedSwin responses"""
     context_summary = ""
     return result.strip()
 async def gemini_supervisor_challenge_async(query: str, current_answer: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> dict:
     """Gemini Supervisor: Challenge and evaluate the current answer"""
     context_info = ""
             "enhancement_instructions": ""
         }
 async def gemini_supervisor_enhance_answer_async(query: str, current_answer: str, enhancement_instructions: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> str:
     """Gemini Supervisor: Enhance the answer based on challenge feedback"""
     context_info = ""
     return result.strip()
 async def gemini_supervisor_check_clarity_async(query: str, answer: str, use_web_search: bool) -> dict:
     """Gemini Supervisor: Check if answer is unclear or supervisor is unsure"""
     if not use_web_search:
         logger.error(f"[GEMINI SUPERVISOR] Clarity check parsing failed: {exc}")
         return {"is_unclear": False, "needs_search": False, "search_queries": []}
 def gemini_supervisor_synthesize(query: str, medswin_answers: list, rag_contexts: list, search_contexts: list, breakdown: dict) -> str:
     """Wrapper to synthesize answer synchronously"""
     if not MCP_AVAILABLE:
         logger.error(f"[GEMINI SUPERVISOR] Challenge failed: {exc}")
         return {"is_optimal": True, "completeness_score": 7, "accuracy_score": 7, "clarity_score": 7, "missing_aspects": [], "inaccuracies": [], "improvement_suggestions": [], "needs_more_context": False, "enhancement_instructions": ""}
 def gemini_supervisor_enhance_answer(query: str, current_answer: str, enhancement_instructions: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> str:
     """Wrapper to enhance answer synchronously"""
     if not MCP_AVAILABLE:
         logger.error(f"[GEMINI SUPERVISOR] Enhancement failed: {exc}")
         return current_answer
 def gemini_supervisor_check_clarity(query: str, answer: str, use_web_search: bool) -> dict:
     """Wrapper to check clarity synchronously"""
     if not MCP_AVAILABLE or not use_web_search:
         logger.error(f"[GEMINI SUPERVISOR] Clarity check failed: {exc}")
         return {"is_unclear": False, "needs_search": False, "search_queries": []}
 async def self_reflection_gemini(answer: str, query: str) -> dict:
     """Self-reflection using Gemini MCP"""
     reflection_prompt = f"""Evaluate this medical answer for quality and completeness:
     logger.info(f"Self-reflection score: {reflection.get('overall_score', 'N/A')}")
     return reflection
 def self_reflection(answer: str, query: str, reasoning: dict) -> dict:
     """Self-reflection: Evaluate answer quality and completeness"""
     if not MCP_AVAILABLE:

utils.py CHANGED Viewed

@@ -119,7 +119,6 @@ def format_url_as_domain(url: str) -> str:
             return domain
         return url
 async def translate_text_gemini(text: str, target_lang: str = "en", source_lang: str = None) -> str:
     """Translate text using Gemini MCP"""
     if source_lang:
@@ -138,7 +137,6 @@ async def translate_text_gemini(text: str, target_lang: str = "en", source_lang:
     return result.strip()
 def translate_text(text: str, target_lang: str = "en", source_lang: str = None) -> str:
     """Translate text using Gemini MCP"""
     if not MCP_AVAILABLE:
@@ -163,5 +161,4 @@ def translate_text(text: str, target_lang: str = "en", source_lang: str = None)
     except Exception as e:
         logger.error(f"Gemini MCP translation error: {e}")
-    return text

             return domain
         return url
 async def translate_text_gemini(text: str, target_lang: str = "en", source_lang: str = None) -> str:
     """Translate text using Gemini MCP"""
     if source_lang:
     return result.strip()
 def translate_text(text: str, target_lang: str = "en", source_lang: str = None) -> str:
     """Translate text using Gemini MCP"""
     if not MCP_AVAILABLE:
     except Exception as e:
         logger.error(f"Gemini MCP translation error: {e}")
+    return text

voice.py CHANGED Viewed

@@ -13,7 +13,6 @@ try:
 except ImportError:
     nest_asyncio = None
 async def transcribe_audio_gemini(audio_path: str) -> str:
     """Transcribe audio using Gemini MCP transcribe_audio tool"""
     if not MCP_AVAILABLE:
@@ -70,7 +69,6 @@ async def transcribe_audio_gemini(audio_path: str) -> str:
         logger.error(f"Gemini transcription error: {e}")
         return ""
 def transcribe_audio(audio):
     """Transcribe audio to text using Gemini MCP"""
     if audio is None:
@@ -112,7 +110,6 @@ def transcribe_audio(audio):
         logger.error(f"Transcription error: {e}")
         return ""
 async def generate_speech_mcp(text: str) -> str:
     """Generate speech using MCP text_to_speech tool"""
     if not MCP_AVAILABLE:
@@ -166,7 +163,6 @@ async def generate_speech_mcp(text: str) -> str:
         logger.warning(f"MCP TTS error: {e}")
         return None
 def generate_speech(text: str):
     """Generate speech from text using TTS model (with MCP fallback)"""
     if not text or len(text.strip()) == 0:

 except ImportError:
     nest_asyncio = None
 async def transcribe_audio_gemini(audio_path: str) -> str:
     """Transcribe audio using Gemini MCP transcribe_audio tool"""
     if not MCP_AVAILABLE:
         logger.error(f"Gemini transcription error: {e}")
         return ""
 def transcribe_audio(audio):
     """Transcribe audio to text using Gemini MCP"""
     if audio is None:
         logger.error(f"Transcription error: {e}")
         return ""
 async def generate_speech_mcp(text: str) -> str:
     """Generate speech using MCP text_to_speech tool"""
     if not MCP_AVAILABLE:
         logger.warning(f"MCP TTS error: {e}")
         return None
 def generate_speech(text: str):
     """Generate speech from text using TTS model (with MCP fallback)"""
     if not text or len(text.strip()) == 0: