Spaces:
Running
on
Zero
Running
on
Zero
Y Phung Nguyen
commited on
Commit
·
590a3e5
1
Parent(s):
4bc9414
Upd history followup
Browse files- pipeline.py +32 -1
- supervisor.py +57 -6
pipeline.py
CHANGED
|
@@ -237,6 +237,28 @@ def _rehydrate_intake_state(session_id: str, history: list):
|
|
| 237 |
return None
|
| 238 |
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
def _start_clinical_intake_session(session_id: str, plan: dict, base_query: str, original_language: str):
|
| 241 |
questions = plan.get("questions", []) or []
|
| 242 |
if not questions:
|
|
@@ -530,9 +552,18 @@ def stream_chat(
|
|
| 530 |
}
|
| 531 |
else:
|
| 532 |
logger.info("[GEMINI SUPERVISOR] Breaking query into sub-topics...")
|
|
|
|
|
|
|
|
|
|
| 533 |
# Run in thread pool to avoid blocking GPU task
|
| 534 |
breakdown = run_gemini_in_thread(
|
| 535 |
-
gemini_supervisor_breakdown,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
)
|
| 537 |
logger.info(f"[GEMINI SUPERVISOR] Created {len(breakdown.get('sub_topics', []))} sub-topics")
|
| 538 |
|
|
|
|
| 237 |
return None
|
| 238 |
|
| 239 |
|
| 240 |
+
def _get_last_assistant_answer(history: list) -> str:
|
| 241 |
+
"""
|
| 242 |
+
Extract the last non-empty assistant answer from history.
|
| 243 |
+
Skips clinical intake clarification prompts so that follow-up
|
| 244 |
+
questions like "clarify your answer" refer to the real medical
|
| 245 |
+
answer, not an intake question.
|
| 246 |
+
"""
|
| 247 |
+
if not history:
|
| 248 |
+
return ""
|
| 249 |
+
for turn in reversed(history):
|
| 250 |
+
if turn.get("role") != "assistant":
|
| 251 |
+
continue
|
| 252 |
+
content = (turn.get("content") or "").strip()
|
| 253 |
+
if not content:
|
| 254 |
+
continue
|
| 255 |
+
# Skip intake prompts that start with the standard header
|
| 256 |
+
if content.startswith("🩺 Question for clarity"):
|
| 257 |
+
continue
|
| 258 |
+
return content
|
| 259 |
+
return ""
|
| 260 |
+
|
| 261 |
+
|
| 262 |
def _start_clinical_intake_session(session_id: str, plan: dict, base_query: str, original_language: str):
|
| 263 |
questions = plan.get("questions", []) or []
|
| 264 |
if not questions:
|
|
|
|
| 552 |
}
|
| 553 |
else:
|
| 554 |
logger.info("[GEMINI SUPERVISOR] Breaking query into sub-topics...")
|
| 555 |
+
# Provide previous assistant answer as context so Gemini can
|
| 556 |
+
# interpret follow-up queries like "clarify your answer".
|
| 557 |
+
previous_answer = _get_last_assistant_answer(history)
|
| 558 |
# Run in thread pool to avoid blocking GPU task
|
| 559 |
breakdown = run_gemini_in_thread(
|
| 560 |
+
gemini_supervisor_breakdown,
|
| 561 |
+
message,
|
| 562 |
+
final_use_rag,
|
| 563 |
+
final_use_web_search,
|
| 564 |
+
elapsed(),
|
| 565 |
+
120,
|
| 566 |
+
previous_answer,
|
| 567 |
)
|
| 568 |
logger.info(f"[GEMINI SUPERVISOR] Created {len(breakdown.get('sub_topics', []))} sub-topics")
|
| 569 |
|
supervisor.py
CHANGED
|
@@ -18,8 +18,20 @@ try:
|
|
| 18 |
except ImportError:
|
| 19 |
nest_asyncio = None
|
| 20 |
|
| 21 |
-
async def gemini_supervisor_breakdown_async(
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
remaining_time = max(15, max_duration - time_elapsed)
|
| 24 |
|
| 25 |
mode_description = []
|
|
@@ -34,7 +46,7 @@ async def gemini_supervisor_breakdown_async(query: str, use_rag: bool, use_web_s
|
|
| 34 |
max_topics_by_time = max(2, int((remaining_time - 20) / estimated_time_per_task))
|
| 35 |
max_topics = min(max_topics_by_time, MAX_SUBTASKS)
|
| 36 |
|
| 37 |
-
|
| 38 |
Break the following medical query into focused sub-topics that MedSwin can answer sequentially.
|
| 39 |
Explore different potential approaches to comprehensively address the topic.
|
| 40 |
|
|
@@ -42,6 +54,24 @@ Query: "{query}"
|
|
| 42 |
Mode: {', '.join(mode_description)}
|
| 43 |
Time Remaining: ~{remaining_time:.1f}s
|
| 44 |
Maximum Topics: {max_topics} (adjust based on complexity - use as many as needed for thorough coverage)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
Return ONLY valid JSON (no markdown, no tables, no explanations):
|
| 47 |
{{
|
|
@@ -455,7 +485,14 @@ def gemini_summarize_clinical_insights(query: str, qa_pairs: list) -> dict:
|
|
| 455 |
"handoff_note": "Proceed with regular workflow."
|
| 456 |
}
|
| 457 |
|
| 458 |
-
def gemini_supervisor_breakdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
"""Wrapper to obtain supervisor breakdown synchronously"""
|
| 460 |
if not MCP_AVAILABLE:
|
| 461 |
logger.warning("[GEMINI SUPERVISOR] MCP SDK unavailable, using fallback breakdown")
|
|
@@ -474,7 +511,14 @@ def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool,
|
|
| 474 |
if nest_asyncio:
|
| 475 |
try:
|
| 476 |
return nest_asyncio.run(
|
| 477 |
-
gemini_supervisor_breakdown_async(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
)
|
| 479 |
except Exception as e:
|
| 480 |
logger.error(f"[GEMINI SUPERVISOR] Async breakdown failed: {e}")
|
|
@@ -483,7 +527,14 @@ def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool,
|
|
| 483 |
logger.error("[GEMINI SUPERVISOR] Nested breakdown execution failed: nest_asyncio not available")
|
| 484 |
raise RuntimeError("nest_asyncio not available")
|
| 485 |
return loop.run_until_complete(
|
| 486 |
-
gemini_supervisor_breakdown_async(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
)
|
| 488 |
except Exception as exc:
|
| 489 |
logger.error(f"[GEMINI SUPERVISOR] Breakdown request failed: {type(exc).__name__}: {exc}")
|
|
|
|
| 18 |
except ImportError:
|
| 19 |
nest_asyncio = None
|
| 20 |
|
| 21 |
+
async def gemini_supervisor_breakdown_async(
|
| 22 |
+
query: str,
|
| 23 |
+
use_rag: bool,
|
| 24 |
+
use_web_search: bool,
|
| 25 |
+
time_elapsed: float,
|
| 26 |
+
max_duration: int = 120,
|
| 27 |
+
previous_answer: str | None = None,
|
| 28 |
+
) -> dict:
|
| 29 |
+
"""Gemini Supervisor: Break user query into sub-topics.
|
| 30 |
+
|
| 31 |
+
previous_answer (optional) is the last assistant answer from the model.
|
| 32 |
+
When present, Gemini can interpret follow-up queries like "clarify your answer"
|
| 33 |
+
in the context of that prior response.
|
| 34 |
+
"""
|
| 35 |
remaining_time = max(15, max_duration - time_elapsed)
|
| 36 |
|
| 37 |
mode_description = []
|
|
|
|
| 46 |
max_topics_by_time = max(2, int((remaining_time - 20) / estimated_time_per_task))
|
| 47 |
max_topics = min(max_topics_by_time, MAX_SUBTASKS)
|
| 48 |
|
| 49 |
+
base_prompt = f"""You are a supervisor agent coordinating with a MedSwin medical specialist model.
|
| 50 |
Break the following medical query into focused sub-topics that MedSwin can answer sequentially.
|
| 51 |
Explore different potential approaches to comprehensively address the topic.
|
| 52 |
|
|
|
|
| 54 |
Mode: {', '.join(mode_description)}
|
| 55 |
Time Remaining: ~{remaining_time:.1f}s
|
| 56 |
Maximum Topics: {max_topics} (adjust based on complexity - use as many as needed for thorough coverage)
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
previous_answer_block = ""
|
| 60 |
+
if previous_answer:
|
| 61 |
+
# Truncate to keep prompt bounded
|
| 62 |
+
trimmed_answer = previous_answer.strip()
|
| 63 |
+
if len(trimmed_answer) > 2000:
|
| 64 |
+
trimmed_answer = trimmed_answer[:2000] + "..."
|
| 65 |
+
previous_answer_block = f"""
|
| 66 |
+
Previous assistant answer (for context if this is a follow-up question):
|
| 67 |
+
\"\"\"{trimmed_answer}\"\"\"
|
| 68 |
+
|
| 69 |
+
If the new query is a follow-up such as "clarify your answer" or
|
| 70 |
+
"based on the treatment you suggested, what about X?", interpret it
|
| 71 |
+
relative to this previous assistant answer while creating sub-topics.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
prompt = f"""{base_prompt}{previous_answer_block}
|
| 75 |
|
| 76 |
Return ONLY valid JSON (no markdown, no tables, no explanations):
|
| 77 |
{{
|
|
|
|
| 485 |
"handoff_note": "Proceed with regular workflow."
|
| 486 |
}
|
| 487 |
|
| 488 |
+
def gemini_supervisor_breakdown(
|
| 489 |
+
query: str,
|
| 490 |
+
use_rag: bool,
|
| 491 |
+
use_web_search: bool,
|
| 492 |
+
time_elapsed: float,
|
| 493 |
+
max_duration: int = 120,
|
| 494 |
+
previous_answer: str | None = None,
|
| 495 |
+
) -> dict:
|
| 496 |
"""Wrapper to obtain supervisor breakdown synchronously"""
|
| 497 |
if not MCP_AVAILABLE:
|
| 498 |
logger.warning("[GEMINI SUPERVISOR] MCP SDK unavailable, using fallback breakdown")
|
|
|
|
| 511 |
if nest_asyncio:
|
| 512 |
try:
|
| 513 |
return nest_asyncio.run(
|
| 514 |
+
gemini_supervisor_breakdown_async(
|
| 515 |
+
query,
|
| 516 |
+
use_rag,
|
| 517 |
+
use_web_search,
|
| 518 |
+
time_elapsed,
|
| 519 |
+
max_duration,
|
| 520 |
+
previous_answer,
|
| 521 |
+
)
|
| 522 |
)
|
| 523 |
except Exception as e:
|
| 524 |
logger.error(f"[GEMINI SUPERVISOR] Async breakdown failed: {e}")
|
|
|
|
| 527 |
logger.error("[GEMINI SUPERVISOR] Nested breakdown execution failed: nest_asyncio not available")
|
| 528 |
raise RuntimeError("nest_asyncio not available")
|
| 529 |
return loop.run_until_complete(
|
| 530 |
+
gemini_supervisor_breakdown_async(
|
| 531 |
+
query,
|
| 532 |
+
use_rag,
|
| 533 |
+
use_web_search,
|
| 534 |
+
time_elapsed,
|
| 535 |
+
max_duration,
|
| 536 |
+
previous_answer,
|
| 537 |
+
)
|
| 538 |
)
|
| 539 |
except Exception as exc:
|
| 540 |
logger.error(f"[GEMINI SUPERVISOR] Breakdown request failed: {type(exc).__name__}: {exc}")
|