Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 16 days ago

Commit

590a3e5

1 Parent(s): 4bc9414

Upd history followup

Browse files

Files changed (2) hide show

pipeline.py +32 -1
supervisor.py +57 -6

pipeline.py CHANGED Viewed

@@ -237,6 +237,28 @@ def _rehydrate_intake_state(session_id: str, history: list):
     return None
 def _start_clinical_intake_session(session_id: str, plan: dict, base_query: str, original_language: str):
     questions = plan.get("questions", []) or []
     if not questions:
@@ -530,9 +552,18 @@ def stream_chat(
         }
     else:
         logger.info("[GEMINI SUPERVISOR] Breaking query into sub-topics...")
         # Run in thread pool to avoid blocking GPU task
         breakdown = run_gemini_in_thread(
-            gemini_supervisor_breakdown, message, final_use_rag, final_use_web_search, elapsed(), 120
         )
         logger.info(f"[GEMINI SUPERVISOR] Created {len(breakdown.get('sub_topics', []))} sub-topics")

     return None
+def _get_last_assistant_answer(history: list) -> str:
+    """
+    Extract the last non-empty assistant answer from history.
+    Skips clinical intake clarification prompts so that follow-up
+    questions like "clarify your answer" refer to the real medical
+    answer, not an intake question.
+    """
+    if not history:
+        return ""
+    for turn in reversed(history):
+        if turn.get("role") != "assistant":
+            continue
+        content = (turn.get("content") or "").strip()
+        if not content:
+            continue
+        # Skip intake prompts that start with the standard header
+        if content.startswith("🩺 Question for clarity"):
+            continue
+        return content
+    return ""
 def _start_clinical_intake_session(session_id: str, plan: dict, base_query: str, original_language: str):
     questions = plan.get("questions", []) or []
     if not questions:
         }
     else:
         logger.info("[GEMINI SUPERVISOR] Breaking query into sub-topics...")
+        # Provide previous assistant answer as context so Gemini can
+        # interpret follow-up queries like "clarify your answer".
+        previous_answer = _get_last_assistant_answer(history)
         # Run in thread pool to avoid blocking GPU task
         breakdown = run_gemini_in_thread(
+            gemini_supervisor_breakdown,
+            message,
+            final_use_rag,
+            final_use_web_search,
+            elapsed(),
+            120,
+            previous_answer,
         )
         logger.info(f"[GEMINI SUPERVISOR] Created {len(breakdown.get('sub_topics', []))} sub-topics")

supervisor.py CHANGED Viewed

@@ -18,8 +18,20 @@ try:
 except ImportError:
     nest_asyncio = None
-async def gemini_supervisor_breakdown_async(query: str, use_rag: bool, use_web_search: bool, time_elapsed: float, max_duration: int = 120) -> dict:
-    """Gemini Supervisor: Break user query into sub-topics"""
     remaining_time = max(15, max_duration - time_elapsed)
     mode_description = []
@@ -34,7 +46,7 @@ async def gemini_supervisor_breakdown_async(query: str, use_rag: bool, use_web_s
     max_topics_by_time = max(2, int((remaining_time - 20) / estimated_time_per_task))
     max_topics = min(max_topics_by_time, MAX_SUBTASKS)
-    prompt = f"""You are a supervisor agent coordinating with a MedSwin medical specialist model.
 Break the following medical query into focused sub-topics that MedSwin can answer sequentially.
 Explore different potential approaches to comprehensively address the topic.
@@ -42,6 +54,24 @@ Query: "{query}"
 Mode: {', '.join(mode_description)}
 Time Remaining: ~{remaining_time:.1f}s
 Maximum Topics: {max_topics} (adjust based on complexity - use as many as needed for thorough coverage)
 Return ONLY valid JSON (no markdown, no tables, no explanations):
 {{
@@ -455,7 +485,14 @@ def gemini_summarize_clinical_insights(query: str, qa_pairs: list) -> dict:
             "handoff_note": "Proceed with regular workflow."
         }
-def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool, time_elapsed: float, max_duration: int = 120) -> dict:
     """Wrapper to obtain supervisor breakdown synchronously"""
     if not MCP_AVAILABLE:
         logger.warning("[GEMINI SUPERVISOR] MCP SDK unavailable, using fallback breakdown")
@@ -474,7 +511,14 @@ def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool,
             if nest_asyncio:
                 try:
                     return nest_asyncio.run(
-                        gemini_supervisor_breakdown_async(query, use_rag, use_web_search, time_elapsed, max_duration)
                     )
                 except Exception as e:
                     logger.error(f"[GEMINI SUPERVISOR] Async breakdown failed: {e}")
@@ -483,7 +527,14 @@ def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool,
                 logger.error("[GEMINI SUPERVISOR] Nested breakdown execution failed: nest_asyncio not available")
                 raise RuntimeError("nest_asyncio not available")
         return loop.run_until_complete(
-            gemini_supervisor_breakdown_async(query, use_rag, use_web_search, time_elapsed, max_duration)
         )
     except Exception as exc:
         logger.error(f"[GEMINI SUPERVISOR] Breakdown request failed: {type(exc).__name__}: {exc}")

 except ImportError:
     nest_asyncio = None
+async def gemini_supervisor_breakdown_async(
+    query: str,
+    use_rag: bool,
+    use_web_search: bool,
+    time_elapsed: float,
+    max_duration: int = 120,
+    previous_answer: str | None = None,
+) -> dict:
+    """Gemini Supervisor: Break user query into sub-topics.
+    previous_answer (optional) is the last assistant answer from the model.
+    When present, Gemini can interpret follow-up queries like "clarify your answer"
+    in the context of that prior response.
+    """
     remaining_time = max(15, max_duration - time_elapsed)
     mode_description = []
     max_topics_by_time = max(2, int((remaining_time - 20) / estimated_time_per_task))
     max_topics = min(max_topics_by_time, MAX_SUBTASKS)
+    base_prompt = f"""You are a supervisor agent coordinating with a MedSwin medical specialist model.
 Break the following medical query into focused sub-topics that MedSwin can answer sequentially.
 Explore different potential approaches to comprehensively address the topic.
 Mode: {', '.join(mode_description)}
 Time Remaining: ~{remaining_time:.1f}s
 Maximum Topics: {max_topics} (adjust based on complexity - use as many as needed for thorough coverage)
+"""
+    previous_answer_block = ""
+    if previous_answer:
+        # Truncate to keep prompt bounded
+        trimmed_answer = previous_answer.strip()
+        if len(trimmed_answer) > 2000:
+            trimmed_answer = trimmed_answer[:2000] + "..."
+        previous_answer_block = f"""
+Previous assistant answer (for context if this is a follow-up question):
+\"\"\"{trimmed_answer}\"\"\"
+If the new query is a follow-up such as "clarify your answer" or
+"based on the treatment you suggested, what about X?", interpret it
+relative to this previous assistant answer while creating sub-topics.
+"""
+    prompt = f"""{base_prompt}{previous_answer_block}
 Return ONLY valid JSON (no markdown, no tables, no explanations):
 {{
             "handoff_note": "Proceed with regular workflow."
         }
+def gemini_supervisor_breakdown(
+    query: str,
+    use_rag: bool,
+    use_web_search: bool,
+    time_elapsed: float,
+    max_duration: int = 120,
+    previous_answer: str | None = None,
+) -> dict:
     """Wrapper to obtain supervisor breakdown synchronously"""
     if not MCP_AVAILABLE:
         logger.warning("[GEMINI SUPERVISOR] MCP SDK unavailable, using fallback breakdown")
             if nest_asyncio:
                 try:
                     return nest_asyncio.run(
+                        gemini_supervisor_breakdown_async(
+                            query,
+                            use_rag,
+                            use_web_search,
+                            time_elapsed,
+                            max_duration,
+                            previous_answer,
+                        )
                     )
                 except Exception as e:
                     logger.error(f"[GEMINI SUPERVISOR] Async breakdown failed: {e}")
                 logger.error("[GEMINI SUPERVISOR] Nested breakdown execution failed: nest_asyncio not available")
                 raise RuntimeError("nest_asyncio not available")
         return loop.run_until_complete(
+            gemini_supervisor_breakdown_async(
+                query,
+                use_rag,
+                use_web_search,
+                time_elapsed,
+                max_duration,
+                previous_answer,
+            )
         )
     except Exception as exc:
         logger.error(f"[GEMINI SUPERVISOR] Breakdown request failed: {type(exc).__name__}: {exc}")