Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

LiamKhoaLe commited on 25 days ago

Commit

52b4ed7

1 Parent(s): b720259

Refactor app.py into modular files for better scalability

Browse files

Files changed (13) hide show

app.py +0 -0
config.py +148 -0
indexing.py +236 -0
logger.py +49 -0
mcp.py +194 -0
models.py +77 -0
pipeline.py +438 -0
reasoning.py +178 -0
search.py +252 -0
supervisor.py +717 -0
ui.py +293 -0
utils.py +109 -0
voice.py +165 -0

app.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

config.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""Configuration constants and global storage"""
+import os
+import threading
+# Model configurations
+MEDSWIN_MODELS = {
+    "MedSwin SFT": "MedSwin/MedSwin-7B-SFT",
+    "MedSwin KD": "MedSwin/MedSwin-7B-KD",
+    "MedSwin TA": "MedSwin/MedSwin-Merged-TA-SFT-0.7"
+}
+DEFAULT_MEDICAL_MODEL = "MedSwin TA"
+EMBEDDING_MODEL = "abhinand/MedEmbed-large-v0.1"
+TTS_MODEL = "maya-research/maya1"
+HF_TOKEN = os.environ.get("HF_TOKEN")
+if not HF_TOKEN:
+    raise ValueError("HF_TOKEN not found in environment variables")
+# Gemini MCP configuration
+GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
+GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash")
+GEMINI_MODEL_LITE = os.environ.get("GEMINI_MODEL_LITE", "gemini-2.5-flash-lite")
+# MCP server configuration
+script_dir = os.path.dirname(os.path.abspath(__file__))
+agent_path = os.path.join(script_dir, "agent.py")
+MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "python")
+MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS", agent_path).split() if os.environ.get("MCP_SERVER_ARGS") else [agent_path]
+MCP_TOOLS_CACHE_TTL = int(os.environ.get("MCP_TOOLS_CACHE_TTL", "60"))
+# Global model storage
+global_medical_models = {}
+global_medical_tokenizers = {}
+global_file_info = {}
+global_tts_model = None
+global_embed_model = None
+# MCP client storage
+global_mcp_session = None
+global_mcp_stdio_ctx = None
+global_mcp_lock = threading.Lock()
+global_mcp_tools_cache = {"timestamp": 0.0, "tools": None}
+# UI constants
+TITLE = "<h1><center>🩺 MedLLM Agent - Medical RAG & Web Search System</center></h1>"
+DESCRIPTION = """
+<center>
+<p><strong>Advanced Medical AI Assistant</strong> powered by MedSwin models</p>
+<p>📄 <strong>Document RAG:</strong> Answer based on uploaded medical documents</p>
+<p>🌐 <strong>Web Search:</strong> Fetch knowledge from reliable online medical resources</p>
+<p>🌍 <strong>Multi-language:</strong> Automatic translation for non-English queries</p>
+<p>Upload PDF or text files to get started!</p>
+</center>
+"""
+CSS = """
+.upload-section {
+    max-width: 400px;
+    margin: 0 auto;
+    padding: 10px;
+    border: 2px dashed #ccc;
+    border-radius: 10px;
+}
+.upload-button {
+    background: #34c759 !important;
+    color: white !important;
+    border-radius: 25px !important;
+}
+.chatbot-container {
+    margin-top: 20px;
+}
+.status-output {
+    margin-top: 10px;
+    font-size: 14px;
+}
+.processing-info {
+    margin-top: 5px;
+    font-size: 12px;
+    color: #666;
+}
+.info-container {
+    margin-top: 10px;
+    padding: 10px;
+    border-radius: 5px;
+}
+.file-list {
+    margin-top: 0;
+    max-height: 200px;
+    overflow-y: auto;
+    padding: 5px;
+    border: 1px solid #eee;
+    border-radius: 5px;
+}
+.stats-box {
+    margin-top: 10px;
+    padding: 10px;
+    border-radius: 5px;
+    font-size: 12px;
+}
+.submit-btn {
+    background: #1a73e8 !important;
+    color: white !important;
+    border-radius: 25px !important;
+    margin-left: 10px;
+    padding: 5px 10px;
+    font-size: 16px;
+}
+.input-row {
+    display: flex;
+    align-items: center;
+}
+.recording-timer {
+    font-size: 12px;
+    color: #666;
+    text-align: center;
+    margin-top: 5px;
+}
+.feature-badge {
+    display: inline-block;
+    padding: 3px 8px;
+    margin: 2px;
+    border-radius: 12px;
+    font-size: 11px;
+    font-weight: bold;
+}
+.badge-rag {
+    background: #e3f2fd;
+    color: #1976d2;
+}
+.badge-web {
+    background: #f3e5f5;
+    color: #7b1fa2;
+}
+@media (min-width: 768px) {
+    .main-container {
+        display: flex;
+        justify-content: space-between;
+        gap: 20px;
+    }
+    .upload-section {
+        flex: 1;
+        max-width: 300px;
+    }
+    .chatbot-container {
+        flex: 2;
+        margin-top: 0;
+    }
+}
+"""

indexing.py ADDED Viewed

	@@ -0,0 +1,236 @@

+"""Document parsing and indexing functions"""
+import os
+import base64
+import asyncio
+import tempfile
+import time
+import gradio as gr
+import spaces
+from llama_index.core import (
+    StorageContext,
+    VectorStoreIndex,
+    load_index_from_storage,
+    Document as LlamaDocument,
+)
+from llama_index.core import Settings
+from llama_index.core.node_parser import (
+    HierarchicalNodeParser,
+    get_leaf_nodes,
+    get_root_nodes,
+)
+from llama_index.core.storage.docstore import SimpleDocumentStore
+from tqdm import tqdm
+from logger import logger
+from mcp import MCP_AVAILABLE, call_agent
+import config
+from models import get_llm_for_rag, get_or_create_embed_model
+try:
+    import nest_asyncio
+except ImportError:
+    nest_asyncio = None
+async def parse_document_gemini(file_path: str, file_extension: str) -> str:
+    """Parse document using Gemini MCP"""
+    if not MCP_AVAILABLE:
+        return ""
+    try:
+        with open(file_path, 'rb') as f:
+            file_content = base64.b64encode(f.read()).decode('utf-8')
+        mime_type_map = {
+            '.pdf': 'application/pdf',
+            '.doc': 'application/msword',
+            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            '.txt': 'text/plain',
+            '.md': 'text/markdown',
+            '.json': 'application/json',
+            '.xml': 'application/xml',
+            '.csv': 'text/csv'
+        }
+        mime_type = mime_type_map.get(file_extension, 'application/octet-stream')
+        files = [{
+            "content": file_content,
+            "type": mime_type
+        }]
+        system_prompt = "Extract all text content from the document accurately."
+        user_prompt = "Extract all text content from this document. Return only the extracted text, preserving structure and formatting where possible."
+        result = await call_agent(
+            user_prompt=user_prompt,
+            system_prompt=system_prompt,
+            files=files,
+            model=config.GEMINI_MODEL_LITE,
+            temperature=0.2
+        )
+        return result.strip()
+    except Exception as e:
+        logger.error(f"Gemini document parsing error: {e}")
+        return ""
+def extract_text_from_document(file):
+    """Extract text from document using Gemini MCP"""
+    file_name = file.name
+    file_extension = os.path.splitext(file_name)[1].lower()
+    if file_extension == '.txt':
+        text = file.read().decode('utf-8')
+        return text, len(text.split()), None
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
+            file.seek(0)
+            tmp_file.write(file.read())
+            tmp_file_path = tmp_file.name
+        if MCP_AVAILABLE:
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_running():
+                    if nest_asyncio:
+                        text = nest_asyncio.run(parse_document_gemini(tmp_file_path, file_extension))
+                    else:
+                        logger.error("Error in nested async document parsing: nest_asyncio not available")
+                        text = ""
+                else:
+                    text = loop.run_until_complete(parse_document_gemini(tmp_file_path, file_extension))
+                try:
+                    os.unlink(tmp_file_path)
+                except:
+                    pass
+                if text:
+                    return text, len(text.split()), None
+                else:
+                    return None, 0, ValueError(f"Failed to extract text from {file_extension} file using Gemini MCP")
+            except Exception as e:
+                logger.error(f"Gemini MCP document parsing error: {e}")
+                try:
+                    os.unlink(tmp_file_path)
+                except:
+                    pass
+                return None, 0, ValueError(f"Error parsing {file_extension} file: {str(e)}")
+        else:
+            try:
+                os.unlink(tmp_file_path)
+            except:
+                pass
+            return None, 0, ValueError(f"Gemini MCP not available. Cannot parse {file_extension} files.")
+    except Exception as e:
+        logger.error(f"Error processing document: {e}")
+        return None, 0, ValueError(f"Error processing {file_extension} file: {str(e)}")
+@spaces.GPU(max_duration=120)
+def create_or_update_index(files, request: gr.Request):
+    """Create or update RAG index from uploaded files"""
+    if not files:
+        return "Please provide files.", ""
+    start_time = time.time()
+    user_id = request.session_hash
+    save_dir = f"./{user_id}_index"
+    llm = get_llm_for_rag()
+    embed_model = get_or_create_embed_model()
+    Settings.llm = llm
+    Settings.embed_model = embed_model
+    file_stats = []
+    new_documents = []
+    for file in tqdm(files, desc="Processing files"):
+        file_basename = os.path.basename(file.name)
+        text, word_count, error = extract_text_from_document(file)
+        if error:
+            logger.error(f"Error processing file {file_basename}: {str(error)}")
+            file_stats.append({
+                "name": file_basename,
+                "words": 0,
+                "status": f"error: {str(error)}"
+            })
+            continue
+        doc = LlamaDocument(
+            text=text,
+            metadata={
+                "file_name": file_basename,
+                "word_count": word_count,
+                "source": "user_upload"
+            }
+        )
+        new_documents.append(doc)
+        file_stats.append({
+            "name": file_basename,
+            "words": word_count,
+            "status": "processed"
+        })
+        config.global_file_info[file_basename] = {
+            "word_count": word_count,
+            "processed_at": time.time()
+        }
+    node_parser = HierarchicalNodeParser.from_defaults(
+        chunk_sizes=[2048, 512, 128],
+        chunk_overlap=20
+    )
+    logger.info(f"Parsing {len(new_documents)} documents into hierarchical nodes")
+    new_nodes = node_parser.get_nodes_from_documents(new_documents)
+    new_leaf_nodes = get_leaf_nodes(new_nodes)
+    new_root_nodes = get_root_nodes(new_nodes)
+    logger.info(f"Generated {len(new_nodes)} total nodes ({len(new_root_nodes)} root, {len(new_leaf_nodes)} leaf)")
+    if os.path.exists(save_dir):
+        logger.info(f"Loading existing index from {save_dir}")
+        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
+        index = load_index_from_storage(storage_context, settings=Settings)
+        docstore = storage_context.docstore
+        docstore.add_documents(new_nodes)
+        for node in tqdm(new_leaf_nodes, desc="Adding leaf nodes to index"):
+            index.insert_nodes([node])
+        total_docs = len(docstore.docs)
+        logger.info(f"Updated index with {len(new_nodes)} new nodes from {len(new_documents)} files")
+    else:
+        logger.info("Creating new index")
+        docstore = SimpleDocumentStore()
+        storage_context = StorageContext.from_defaults(docstore=docstore)
+        docstore.add_documents(new_nodes)
+        index = VectorStoreIndex(
+            new_leaf_nodes,
+            storage_context=storage_context,
+            settings=Settings
+        )
+        total_docs = len(new_documents)
+        logger.info(f"Created new index with {len(new_nodes)} nodes from {len(new_documents)} files")
+    index.storage_context.persist(persist_dir=save_dir)
+    file_list_html = "<div class='file-list'>"
+    for stat in file_stats:
+        status_color = "#4CAF50" if stat["status"] == "processed" else "#f44336"
+        file_list_html += f"<div><span style='color:{status_color}'>●</span> {stat['name']} - {stat['words']} words</div>"
+    file_list_html += "</div>"
+    processing_time = time.time() - start_time
+    stats_output = f"<div class='stats-box'>"
+    stats_output += f"✓ Processed {len(files)} files in {processing_time:.2f} seconds<br>"
+    stats_output += f"✓ Created {len(new_nodes)} nodes ({len(new_leaf_nodes)} leaf nodes)<br>"
+    stats_output += f"✓ Total documents in index: {total_docs}<br>"
+    stats_output += f"✓ Index saved to: {save_dir}<br>"
+    stats_output += "</div>"
+    output_container = f"<div class='info-container'>"
+    output_container += file_list_html
+    output_container += stats_output
+    output_container += "</div>"
+    return f"Successfully indexed {len(files)} files.", output_container

logger.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Logging configuration and custom handlers"""
+import logging
+import threading
+from transformers import logging as hf_logging
+# Set logging to INFO level for cleaner output
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Custom logger handler to capture agentic thoughts
+class ThoughtCaptureHandler(logging.Handler):
+    """Custom handler to capture internal thoughts from MedSwin and supervisor"""
+    def __init__(self):
+        super().__init__()
+        self.thoughts = []
+        self.lock = threading.Lock()
+    def emit(self, record):
+        """Capture log messages that contain agentic thoughts"""
+        try:
+            msg = self.format(record)
+            # Only capture messages from GEMINI SUPERVISOR or MEDSWIN
+            if "[GEMINI SUPERVISOR]" in msg or "[MEDSWIN]" in msg or "[MAC]" in msg:
+                # Remove timestamp and logger name for cleaner display
+                parts = msg.split(" - ", 3)
+                if len(parts) >= 4:
+                    clean_msg = parts[-1]
+                else:
+                    clean_msg = msg
+                with self.lock:
+                    self.thoughts.append(clean_msg)
+        except Exception:
+            pass
+    def get_thoughts(self):
+        """Get all captured thoughts as a formatted string"""
+        with self.lock:
+            return "\n".join(self.thoughts)
+    def clear(self):
+        """Clear captured thoughts"""
+        with self.lock:
+            self.thoughts = []
+# Set MCP client logging to WARNING to reduce noise
+mcp_client_logger = logging.getLogger("mcp.client")
+mcp_client_logger.setLevel(logging.WARNING)
+hf_logging.set_verbosity_error()

mcp.py ADDED Viewed

	@@ -0,0 +1,194 @@

+"""MCP session management and tool caching"""
+import os
+import time
+import asyncio
+from logger import logger
+import config
+# MCP imports
+MCP_CLIENT_INFO = None
+try:
+    from mcp import ClientSession, StdioServerParameters
+    from mcp import types as mcp_types
+    from mcp.client.stdio import stdio_client
+    try:
+        import nest_asyncio
+        nest_asyncio.apply()
+    except ImportError:
+        pass
+    MCP_AVAILABLE = True
+    MCP_CLIENT_INFO = mcp_types.Implementation(
+        name="MedLLM-Agent",
+        version=os.environ.get("SPACE_VERSION", "local"),
+    )
+except ImportError as e:
+    logger.warning(f"MCP SDK not available: {e}")
+    MCP_AVAILABLE = False
+    MCP_CLIENT_INFO = None
+async def get_mcp_session():
+    """Get or create MCP client session with proper context management"""
+    if not MCP_AVAILABLE:
+        logger.warning("MCP not available - SDK not installed")
+        return None
+    if config.global_mcp_session is not None:
+        return config.global_mcp_session
+    try:
+        mcp_env = os.environ.copy()
+        if config.GEMINI_API_KEY:
+            mcp_env["GEMINI_API_KEY"] = config.GEMINI_API_KEY
+        else:
+            logger.warning("GEMINI_API_KEY not set in environment. Gemini MCP features may not work.")
+        if os.environ.get("GEMINI_MODEL"):
+            mcp_env["GEMINI_MODEL"] = os.environ.get("GEMINI_MODEL")
+        if os.environ.get("GEMINI_TIMEOUT"):
+            mcp_env["GEMINI_TIMEOUT"] = os.environ.get("GEMINI_TIMEOUT")
+        if os.environ.get("GEMINI_MAX_OUTPUT_TOKENS"):
+            mcp_env["GEMINI_MAX_OUTPUT_TOKENS"] = os.environ.get("GEMINI_MAX_OUTPUT_TOKENS")
+        if os.environ.get("GEMINI_TEMPERATURE"):
+            mcp_env["GEMINI_TEMPERATURE"] = os.environ.get("GEMINI_TEMPERATURE")
+        logger.info("Creating MCP client session...")
+        server_params = StdioServerParameters(
+            command=config.MCP_SERVER_COMMAND,
+            args=config.MCP_SERVER_ARGS,
+            env=mcp_env
+        )
+        stdio_ctx = stdio_client(server_params)
+        read, write = await stdio_ctx.__aenter__()
+        session = ClientSession(
+            read,
+            write,
+            client_info=MCP_CLIENT_INFO,
+        )
+        try:
+            await session.__aenter__()
+            init_result = await session.initialize()
+            server_info = getattr(init_result, "serverInfo", None)
+            server_name = getattr(server_info, "name", "unknown")
+            server_version = getattr(server_info, "version", "unknown")
+            logger.info(f"✅ MCP session initialized (server={server_name} v{server_version})")
+        except Exception as e:
+            error_msg = str(e)
+            error_type = type(e).__name__
+            logger.error(f"❌ MCP session initialization failed: {error_type}: {error_msg}")
+            try:
+                await session.__aexit__(None, None, None)
+            except Exception:
+                pass
+            try:
+                await stdio_ctx.__aexit__(None, None, None)
+            except Exception:
+                pass
+            return None
+        config.global_mcp_session = session
+        config.global_mcp_stdio_ctx = stdio_ctx
+        logger.info("✅ MCP client session created successfully")
+        return session
+    except Exception as e:
+        error_type = type(e).__name__
+        error_msg = str(e)
+        logger.error(f"❌ Failed to create MCP client session: {error_type}: {error_msg}")
+        config.global_mcp_session = None
+        config.global_mcp_stdio_ctx = None
+        return None
+def invalidate_mcp_tools_cache():
+    """Invalidate cached MCP tool metadata"""
+    config.global_mcp_tools_cache = {"timestamp": 0.0, "tools": None}
+async def get_cached_mcp_tools(force_refresh: bool = False):
+    """Return cached MCP tools list to avoid repeated list_tools calls"""
+    if not MCP_AVAILABLE:
+        return []
+    now = time.time()
+    if (
+        not force_refresh
+        and config.global_mcp_tools_cache["tools"]
+        and now - config.global_mcp_tools_cache["timestamp"] < config.MCP_TOOLS_CACHE_TTL
+    ):
+        return config.global_mcp_tools_cache["tools"]
+    session = await get_mcp_session()
+    if session is None:
+        return []
+    try:
+        tools_resp = await session.list_tools()
+        tools_list = list(getattr(tools_resp, "tools", []) or [])
+        config.global_mcp_tools_cache = {"timestamp": now, "tools": tools_list}
+        return tools_list
+    except Exception as e:
+        logger.error(f"Failed to refresh MCP tools: {e}")
+        invalidate_mcp_tools_cache()
+        return []
+async def call_agent(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:
+    """Call Gemini MCP generate_content tool"""
+    if not MCP_AVAILABLE:
+        logger.warning("MCP not available for Gemini call")
+        return ""
+    try:
+        session = await get_mcp_session()
+        if session is None:
+            logger.warning("Failed to get MCP session for Gemini call")
+            return ""
+        tools = await get_cached_mcp_tools()
+        if not tools:
+            tools = await get_cached_mcp_tools(force_refresh=True)
+        if not tools:
+            logger.error("Unable to obtain MCP tool catalog for Gemini calls")
+            return ""
+        generate_tool = None
+        for tool in tools:
+            if tool.name == "generate_content" or "generate_content" in tool.name.lower():
+                generate_tool = tool
+                logger.info(f"Found Gemini MCP tool: {tool.name}")
+                break
+        if not generate_tool:
+            logger.warning(f"Gemini MCP generate_content tool not found. Available tools: {[t.name for t in tools]}")
+            invalidate_mcp_tools_cache()
+            return ""
+        arguments = {
+            "user_prompt": user_prompt
+        }
+        if system_prompt:
+            arguments["system_prompt"] = system_prompt
+        if files:
+            arguments["files"] = files
+        if model:
+            arguments["model"] = model
+        if temperature is not None:
+            arguments["temperature"] = temperature
+        result = await session.call_tool(generate_tool.name, arguments=arguments)
+        if hasattr(result, 'content') and result.content:
+            for item in result.content:
+                if hasattr(item, 'text'):
+                    response_text = item.text.strip()
+                    return response_text
+        logger.warning("⚠️ Gemini MCP returned empty or invalid result")
+        return ""
+    except Exception as e:
+        logger.error(f"Gemini MCP call error: {e}")
+        return ""

models.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""Model initialization and management"""
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from llama_index.llms.huggingface import HuggingFaceLLM
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from logger import logger
+import config
+try:
+    from TTS.api import TTS
+    TTS_AVAILABLE = True
+except ImportError:
+    TTS_AVAILABLE = False
+    TTS = None
+def initialize_medical_model(model_name: str):
+    """Initialize medical model (MedSwin) - download on demand"""
+    if model_name not in config.global_medical_models or config.global_medical_models[model_name] is None:
+        logger.info(f"Initializing medical model: {model_name}...")
+        model_path = config.MEDSWIN_MODELS[model_name]
+        tokenizer = AutoTokenizer.from_pretrained(model_path, token=config.HF_TOKEN)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            device_map="auto",
+            trust_remote_code=True,
+            token=config.HF_TOKEN,
+            torch_dtype=torch.float16
+        )
+        config.global_medical_models[model_name] = model
+        config.global_medical_tokenizers[model_name] = tokenizer
+        logger.info(f"Medical model {model_name} initialized successfully")
+    return config.global_medical_models[model_name], config.global_medical_tokenizers[model_name]
+def initialize_tts_model():
+    """Initialize TTS model for text-to-speech"""
+    if not TTS_AVAILABLE:
+        logger.warning("TTS library not installed. TTS features will be disabled.")
+        return None
+    if config.global_tts_model is None:
+        try:
+            logger.info("Initializing TTS model for voice generation...")
+            config.global_tts_model = TTS(model_name=config.TTS_MODEL, progress_bar=False)
+            logger.info("TTS model initialized successfully")
+        except Exception as e:
+            logger.warning(f"TTS model initialization failed: {e}")
+            logger.warning("TTS features will be disabled. If pyworld dependency is missing, try: pip install TTS --no-deps && pip install coqui-tts")
+            config.global_tts_model = None
+    return config.global_tts_model
+def get_or_create_embed_model():
+    """Reuse embedding model to avoid reloading weights each request"""
+    if config.global_embed_model is None:
+        logger.info("Initializing shared embedding model for RAG retrieval...")
+        config.global_embed_model = HuggingFaceEmbedding(model_name=config.EMBEDDING_MODEL, token=config.HF_TOKEN)
+    return config.global_embed_model
+def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
+    """Get LLM for RAG indexing (uses medical model)"""
+    medical_model_obj, medical_tokenizer = initialize_medical_model(config.DEFAULT_MEDICAL_MODEL)
+    return HuggingFaceLLM(
+        context_window=4096,
+        max_new_tokens=max_new_tokens,
+        tokenizer=medical_tokenizer,
+        model=medical_model_obj,
+        generate_kwargs={
+            "do_sample": True,
+            "temperature": temperature,
+            "top_k": top_k,
+            "top_p": top_p
+        }
+    )

pipeline.py ADDED Viewed

	@@ -0,0 +1,438 @@

+"""Main chat pipeline - stream_chat function"""
+import os
+import json
+import time
+import logging
+import concurrent.futures
+import gradio as gr
+import spaces
+from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
+from llama_index.core import Settings
+from llama_index.core.retrievers import AutoMergingRetriever
+from logger import logger, ThoughtCaptureHandler
+from models import initialize_medical_model, get_or_create_embed_model
+from utils import detect_language, translate_text, format_url_as_domain
+from search import search_web, summarize_web_content
+from reasoning import autonomous_reasoning, create_execution_plan, autonomous_execution_strategy
+from supervisor import (
+    gemini_supervisor_breakdown, gemini_supervisor_search_strategies,
+    gemini_supervisor_rag_brainstorm, execute_medswin_task,
+    gemini_supervisor_synthesize, gemini_supervisor_challenge,
+    gemini_supervisor_enhance_answer, gemini_supervisor_check_clarity
+)
+@spaces.GPU(max_duration=120)
+def stream_chat(
+    message: str,
+    history: list,
+    system_prompt: str,
+    temperature: float,
+    max_new_tokens: int,
+    top_p: float,
+    top_k: int,
+    penalty: float,
+    retriever_k: int,
+    merge_threshold: float,
+    use_rag: bool,
+    medical_model: str,
+    use_web_search: bool,
+    disable_agentic_reasoning: bool,
+    show_thoughts: bool,
+    request: gr.Request
+):
+    """Main chat pipeline implementing MAC architecture"""
+    if not request:
+        yield history + [{"role": "assistant", "content": "Session initialization failed. Please refresh the page."}], ""
+        return
+    thought_handler = None
+    if show_thoughts:
+        thought_handler = ThoughtCaptureHandler()
+        thought_handler.setLevel(logging.INFO)
+        thought_handler.clear()
+        logger.addHandler(thought_handler)
+    session_start = time.time()
+    soft_timeout = 100
+    hard_timeout = 118
+    def elapsed():
+        return time.time() - session_start
+    user_id = request.session_hash
+    index_dir = f"./{user_id}_index"
+    has_rag_index = os.path.exists(index_dir)
+    original_lang = detect_language(message)
+    original_message = message
+    needs_translation = original_lang != "en"
+    pipeline_diagnostics = {
+        "reasoning": None,
+        "plan": None,
+        "strategy_decisions": [],
+        "stage_metrics": {},
+        "search": {"strategies": [], "total_results": 0}
+    }
+    def record_stage(stage_name: str, start_time: float):
+        pipeline_diagnostics["stage_metrics"][stage_name] = round(time.time() - start_time, 3)
+    translation_stage_start = time.time()
+    if needs_translation:
+        logger.info(f"[GEMINI SUPERVISOR] Detected non-English language: {original_lang}, translating...")
+        message = translate_text(message, target_lang="en", source_lang=original_lang)
+        logger.info(f"[GEMINI SUPERVISOR] Translated query: {message[:100]}...")
+    record_stage("translation", translation_stage_start)
+    final_use_rag = use_rag and has_rag_index and not disable_agentic_reasoning
+    final_use_web_search = use_web_search and not disable_agentic_reasoning
+    plan = None
+    if not disable_agentic_reasoning:
+        reasoning_stage_start = time.time()
+        reasoning = autonomous_reasoning(message, history)
+        record_stage("autonomous_reasoning", reasoning_stage_start)
+        pipeline_diagnostics["reasoning"] = reasoning
+        plan = create_execution_plan(reasoning, message, has_rag_index)
+        pipeline_diagnostics["plan"] = plan
+        execution_strategy = autonomous_execution_strategy(
+            reasoning, plan, final_use_rag, final_use_web_search, has_rag_index
+        )
+        if final_use_rag and not reasoning.get("requires_rag", True):
+            final_use_rag = False
+            pipeline_diagnostics["strategy_decisions"].append("Skipped RAG per autonomous reasoning")
+        elif not final_use_rag and reasoning.get("requires_rag", True) and not has_rag_index:
+            pipeline_diagnostics["strategy_decisions"].append("Reasoning wanted RAG but no index available")
+        if final_use_web_search and not reasoning.get("requires_web_search", False):
+            final_use_web_search = False
+            pipeline_diagnostics["strategy_decisions"].append("Skipped web search per autonomous reasoning")
+        elif not final_use_web_search and reasoning.get("requires_web_search", False):
+            if not use_web_search:
+                pipeline_diagnostics["strategy_decisions"].append("User disabled web search despite reasoning request")
+            else:
+                pipeline_diagnostics["strategy_decisions"].append("Web search requested by reasoning but disabled by mode")
+    else:
+        pipeline_diagnostics["strategy_decisions"].append("Agentic reasoning disabled by user")
+    if disable_agentic_reasoning:
+        logger.info("[MAC] Agentic reasoning disabled - using MedSwin alone")
+        breakdown = {
+            "sub_topics": [
+                {"id": 1, "topic": "Answer", "instruction": message, "expected_tokens": 400, "priority": "high", "approach": "direct answer"}
+            ],
+            "strategy": "Direct answer",
+            "exploration_note": "Direct mode - no breakdown"
+        }
+    else:
+        logger.info("[GEMINI SUPERVISOR] Breaking query into sub-topics...")
+        breakdown = gemini_supervisor_breakdown(message, final_use_rag, final_use_web_search, elapsed(), max_duration=120)
+        logger.info(f"[GEMINI SUPERVISOR] Created {len(breakdown.get('sub_topics', []))} sub-topics")
+    search_contexts = []
+    web_urls = []
+    if final_use_web_search:
+        search_stage_start = time.time()
+        logger.info("[GEMINI SUPERVISOR] Search mode: Creating search strategies...")
+        search_strategies = gemini_supervisor_search_strategies(message, elapsed())
+        all_search_results = []
+        strategy_jobs = []
+        for strategy in search_strategies.get("search_strategies", [])[:4]:
+            search_query = strategy.get("strategy", message)
+            target_sources = strategy.get("target_sources", 2)
+            strategy_jobs.append({
+                "query": search_query,
+                "target_sources": target_sources,
+                "meta": strategy
+            })
+        def execute_search(job):
+            job_start = time.time()
+            try:
+                results = search_web(job["query"], max_results=job["target_sources"])
+                duration = time.time() - job_start
+                return results, duration, None
+            except Exception as exc:
+                return [], time.time() - job_start, exc
+        def record_search_diag(job, duration, results_count, error=None):
+            entry = {
+                "query": job["query"],
+                "target_sources": job["target_sources"],
+                "duration": round(duration, 3),
+                "results": results_count
+            }
+            if error:
+                entry["error"] = str(error)
+            pipeline_diagnostics["search"]["strategies"].append(entry)
+        if strategy_jobs:
+            max_workers = min(len(strategy_jobs), 4)
+            if len(strategy_jobs) > 1:
+                with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                    future_map = {executor.submit(execute_search, job): job for job in strategy_jobs}
+                    for future in concurrent.futures.as_completed(future_map):
+                        job = future_map[future]
+                        try:
+                            results, duration, error = future.result()
+                        except Exception as exc:
+                            results, duration, error = [], 0.0, exc
+                        record_search_diag(job, duration, len(results), error)
+                        if not error and results:
+                            all_search_results.extend(results)
+                            web_urls.extend([r.get('url', '') for r in results if r.get('url')])
+            else:
+                job = strategy_jobs[0]
+                results, duration, error = execute_search(job)
+                record_search_diag(job, duration, len(results), error)
+                if not error and results:
+                    all_search_results.extend(results)
+                    web_urls.extend([r.get('url', '') for r in results if r.get('url')])
+        else:
+            pipeline_diagnostics["strategy_decisions"].append("No viable web search strategies returned")
+        pipeline_diagnostics["search"]["total_results"] = len(all_search_results)
+        if all_search_results:
+            logger.info(f"[GEMINI SUPERVISOR] Summarizing {len(all_search_results)} search results...")
+            search_summary = summarize_web_content(all_search_results, message)
+            if search_summary:
+                search_contexts.append(search_summary)
+                logger.info(f"[GEMINI SUPERVISOR] Search summary created: {len(search_summary)} chars")
+        record_stage("web_search", search_stage_start)
+    rag_contexts = []
+    if final_use_rag and has_rag_index:
+        rag_stage_start = time.time()
+        if elapsed() >= soft_timeout - 10:
+            logger.warning("[GEMINI SUPERVISOR] Skipping RAG due to time pressure")
+            final_use_rag = False
+        else:
+            logger.info("[GEMINI SUPERVISOR] RAG mode: Retrieving documents...")
+            embed_model = get_or_create_embed_model()
+            Settings.embed_model = embed_model
+            storage_context = StorageContext.from_defaults(persist_dir=index_dir)
+            index = load_index_from_storage(storage_context, settings=Settings)
+            base_retriever = index.as_retriever(similarity_top_k=retriever_k)
+            auto_merging_retriever = AutoMergingRetriever(
+                base_retriever,
+                storage_context=storage_context,
+                simple_ratio_thresh=merge_threshold,
+                verbose=False
+            )
+            merged_nodes = auto_merging_retriever.retrieve(message)
+            retrieved_docs = "\n\n".join([n.node.text for n in merged_nodes])
+            logger.info(f"[GEMINI SUPERVISOR] Retrieved {len(merged_nodes)} document nodes")
+            logger.info("[GEMINI SUPERVISOR] Brainstorming RAG contexts...")
+            rag_brainstorm = gemini_supervisor_rag_brainstorm(message, retrieved_docs, elapsed())
+            rag_contexts = [ctx.get("context", "") for ctx in rag_brainstorm.get("contexts", [])]
+            logger.info(f"[GEMINI SUPERVISOR] Created {len(rag_contexts)} RAG contexts")
+        record_stage("rag_retrieval", rag_stage_start)
+    medical_model_obj, medical_tokenizer = initialize_medical_model(medical_model)
+    base_system_prompt = system_prompt if system_prompt else "As a medical specialist, provide clinical and concise answers. Use Markdown format with bullet points. Do not use tables."
+    combined_context = ""
+    if rag_contexts:
+        combined_context += "Document Context:\n" + "\n\n".join(rag_contexts[:4])
+    if search_contexts:
+        if combined_context:
+            combined_context += "\n\n"
+        combined_context += "Web Search Context:\n" + "\n\n".join(search_contexts)
+    logger.info(f"[MEDSWIN] Executing {len(breakdown.get('sub_topics', []))} tasks sequentially...")
+    medswin_answers = []
+    updated_history = history + [
+        {"role": "user", "content": original_message},
+        {"role": "assistant", "content": ""}
+    ]
+    thoughts_text = thought_handler.get_thoughts() if thought_handler else ""
+    yield updated_history, thoughts_text
+    medswin_stage_start = time.time()
+    for idx, sub_topic in enumerate(breakdown.get("sub_topics", []), 1):
+        if elapsed() >= hard_timeout - 5:
+            logger.warning(f"[MEDSWIN] Time limit approaching, stopping at task {idx}")
+            break
+        task_instruction = sub_topic.get("instruction", "")
+        topic_name = sub_topic.get("topic", f"Topic {idx}")
+        priority = sub_topic.get("priority", "medium")
+        logger.info(f"[MEDSWIN] Executing task {idx}/{len(breakdown.get('sub_topics', []))}: {topic_name} (priority: {priority})")
+        task_context = combined_context
+        if len(rag_contexts) > 1 and idx <= len(rag_contexts):
+            task_context = rag_contexts[idx - 1] if idx <= len(rag_contexts) else combined_context
+        try:
+            task_answer = execute_medswin_task(
+                medical_model_obj=medical_model_obj,
+                medical_tokenizer=medical_tokenizer,
+                task_instruction=task_instruction,
+                context=task_context if task_context else "",
+                system_prompt_base=base_system_prompt,
+                temperature=temperature,
+                max_new_tokens=min(max_new_tokens, 800),
+                top_p=top_p,
+                top_k=top_k,
+                penalty=penalty
+            )
+            formatted_answer = f"## {topic_name}\n\n{task_answer}"
+            medswin_answers.append(formatted_answer)
+            logger.info(f"[MEDSWIN] Task {idx} completed: {len(task_answer)} chars")
+            partial_final = "\n\n".join(medswin_answers)
+            updated_history[-1]["content"] = partial_final
+            thoughts_text = thought_handler.get_thoughts() if thought_handler else ""
+            yield updated_history, thoughts_text
+        except Exception as e:
+            logger.error(f"[MEDSWIN] Task {idx} failed: {e}")
+            continue
+    record_stage("medswin_tasks", medswin_stage_start)
+    logger.info("[GEMINI SUPERVISOR] Synthesizing final answer from all MedSwin responses...")
+    raw_medswin_answers = [ans.split('\n\n', 1)[1] if '\n\n' in ans else ans for ans in medswin_answers]
+    synthesis_stage_start = time.time()
+    final_answer = gemini_supervisor_synthesize(message, raw_medswin_answers, rag_contexts, search_contexts, breakdown)
+    record_stage("synthesis", synthesis_stage_start)
+    if not final_answer or len(final_answer.strip()) < 50:
+        logger.warning("[GEMINI SUPERVISOR] Synthesis failed or too short, using concatenation")
+        final_answer = "\n\n".join(medswin_answers) if medswin_answers else "I apologize, but I was unable to generate a response."
+    if "|" in final_answer and "---" in final_answer:
+        logger.warning("[MEDSWIN] Final answer contains tables, converting to bullets")
+        lines = final_answer.split('\n')
+        cleaned_lines = []
+        for line in lines:
+            if '|' in line and '---' not in line:
+                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
+                if cells:
+                    cleaned_lines.append(f"- {' / '.join(cells)}")
+            elif '---' not in line:
+                cleaned_lines.append(line)
+        final_answer = '\n'.join(cleaned_lines)
+    max_challenge_iterations = 2
+    challenge_iteration = 0
+    challenge_stage_start = time.time()
+    while challenge_iteration < max_challenge_iterations and elapsed() < soft_timeout - 15:
+        challenge_iteration += 1
+        logger.info(f"[GEMINI SUPERVISOR] Challenge iteration {challenge_iteration}/{max_challenge_iterations}...")
+        evaluation = gemini_supervisor_challenge(message, final_answer, raw_medswin_answers, rag_contexts, search_contexts)
+        if evaluation.get("is_optimal", False):
+            logger.info(f"[GEMINI SUPERVISOR] Answer confirmed optimal after {challenge_iteration} iteration(s)")
+            break
+        enhancement_instructions = evaluation.get("enhancement_instructions", "")
+        if not enhancement_instructions:
+            logger.info("[GEMINI SUPERVISOR] No enhancement instructions, considering answer optimal")
+            break
+        logger.info(f"[GEMINI SUPERVISOR] Enhancing answer based on feedback...")
+        enhanced_answer = gemini_supervisor_enhance_answer(
+            message, final_answer, enhancement_instructions, raw_medswin_answers, rag_contexts, search_contexts
+        )
+        if enhanced_answer and len(enhanced_answer.strip()) > len(final_answer.strip()) * 0.8:
+            final_answer = enhanced_answer
+            logger.info(f"[GEMINI SUPERVISOR] Answer enhanced (new length: {len(final_answer)} chars)")
+        else:
+            logger.info("[GEMINI SUPERVISOR] Enhancement did not improve answer significantly, stopping")
+            break
+    record_stage("challenge_loop", challenge_stage_start)
+    if final_use_web_search and elapsed() < soft_timeout - 10:
+        logger.info("[GEMINI SUPERVISOR] Checking if additional search is needed...")
+        clarity_stage_start = time.time()
+        clarity_check = gemini_supervisor_check_clarity(message, final_answer, final_use_web_search)
+        record_stage("clarity_check", clarity_stage_start)
+        if clarity_check.get("needs_search", False) and clarity_check.get("search_queries"):
+            logger.info(f"[GEMINI SUPERVISOR] Triggering additional search: {clarity_check.get('search_queries', [])}")
+            additional_search_results = []
+            followup_stage_start = time.time()
+            for search_query in clarity_check.get("search_queries", [])[:3]:
+                if elapsed() >= soft_timeout - 5:
+                    break
+                extra_start = time.time()
+                results = search_web(search_query, max_results=2)
+                extra_duration = time.time() - extra_start
+                pipeline_diagnostics["search"]["strategies"].append({
+                    "query": search_query,
+                    "target_sources": 2,
+                    "duration": round(extra_duration, 3),
+                    "results": len(results),
+                    "type": "followup"
+                })
+                additional_search_results.extend(results)
+                web_urls.extend([r.get('url', '') for r in results if r.get('url')])
+            if additional_search_results:
+                pipeline_diagnostics["search"]["total_results"] += len(additional_search_results)
+                logger.info(f"[GEMINI SUPERVISOR] Summarizing {len(additional_search_results)} additional search results...")
+                additional_summary = summarize_web_content(additional_search_results, message)
+                if additional_summary:
+                    search_contexts.append(additional_summary)
+                    logger.info("[GEMINI SUPERVISOR] Enhancing answer with additional search context...")
+                    enhanced_with_search = gemini_supervisor_enhance_answer(
+                        message, final_answer,
+                        f"Incorporate the following additional information from web search: {additional_summary}",
+                        raw_medswin_answers, rag_contexts, search_contexts
+                    )
+                    if enhanced_with_search and len(enhanced_with_search.strip()) > 50:
+                        final_answer = enhanced_with_search
+                        logger.info("[GEMINI SUPERVISOR] Answer enhanced with additional search context")
+            record_stage("followup_search", followup_stage_start)
+    citations_text = ""
+    if needs_translation and final_answer:
+        logger.info(f"[GEMINI SUPERVISOR] Translating response back to {original_lang}...")
+        final_answer = translate_text(final_answer, target_lang=original_lang, source_lang="en")
+    if web_urls:
+        unique_urls = list(dict.fromkeys(web_urls))
+        citation_links = []
+        for url in unique_urls[:5]:
+            domain = format_url_as_domain(url)
+            if domain:
+                citation_links.append(f"[{domain}]({url})")
+        if citation_links:
+            citations_text = "\n\n**Sources:** " + ", ".join(citation_links)
+    speaker_icon = ' 🔊'
+    final_answer_with_metadata = final_answer + citations_text + speaker_icon
+    updated_history[-1]["content"] = final_answer_with_metadata
+    thoughts_text = thought_handler.get_thoughts() if thought_handler else ""
+    yield updated_history, thoughts_text
+    if thought_handler:
+        logger.removeHandler(thought_handler)
+    diag_summary = {
+        "stage_metrics": pipeline_diagnostics["stage_metrics"],
+        "decisions": pipeline_diagnostics["strategy_decisions"],
+        "search": pipeline_diagnostics["search"],
+    }
+    try:
+        logger.info(f"[MAC] Diagnostics summary: {json.dumps(diag_summary)[:1200]}")
+    except Exception:
+        logger.info(f"[MAC] Diagnostics summary (non-serializable)")
+    logger.info(f"[MAC] Final answer generated: {len(final_answer)} chars, {len(breakdown.get('sub_topics', []))} tasks completed")

reasoning.py ADDED Viewed

	@@ -0,0 +1,178 @@

+"""Autonomous reasoning and execution planning"""
+import json
+import asyncio
+from logger import logger
+from mcp import MCP_AVAILABLE, call_agent
+from config import GEMINI_MODEL
+try:
+    import nest_asyncio
+except ImportError:
+    nest_asyncio = None
+async def autonomous_reasoning_gemini(query: str) -> dict:
+    """Autonomous reasoning using Gemini MCP"""
+    reasoning_prompt = f"""Analyze this medical query and provide structured reasoning:
+Query: "{query}"
+Analyze:
+1. Query Type: (diagnosis, treatment, drug_info, symptom_analysis, research, general_info)
+2. Complexity: (simple, moderate, complex, multi_faceted)
+3. Information Needs: What specific information is required?
+4. Requires RAG: (yes/no) - Does this need document context?
+5. Requires Web Search: (yes/no) - Does this need current/updated information?
+6. Sub-questions: Break down into key sub-questions if complex
+Respond in JSON format:
+{{
+    "query_type": "...",
+    "complexity": "...",
+    "information_needs": ["..."],
+    "requires_rag": true/false,
+    "requires_web_search": true/false,
+    "sub_questions": ["..."]
+}}"""
+    system_prompt = "You are a medical reasoning system. Analyze queries systematically and provide structured JSON responses."
+    response = await call_agent(
+        user_prompt=reasoning_prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL,
+        temperature=0.3
+    )
+    try:
+        json_start = response.find('{')
+        json_end = response.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            reasoning = json.loads(response[json_start:json_end])
+        else:
+            raise ValueError("No JSON found")
+    except:
+        reasoning = {
+            "query_type": "general_info",
+            "complexity": "moderate",
+            "information_needs": ["medical information"],
+            "requires_rag": True,
+            "requires_web_search": False,
+            "sub_questions": [query]
+        }
+    logger.info(f"Reasoning analysis: {reasoning}")
+    return reasoning
+def autonomous_reasoning(query: str, history: list) -> dict:
+    """Autonomous reasoning: Analyze query complexity, intent, and information needs"""
+    if not MCP_AVAILABLE:
+        logger.warning("⚠️ Gemini MCP not available for reasoning, using fallback")
+        return {
+            "query_type": "general_info",
+            "complexity": "moderate",
+            "information_needs": ["medical information"],
+            "requires_rag": True,
+            "requires_web_search": False,
+            "sub_questions": [query]
+        }
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                reasoning = nest_asyncio.run(autonomous_reasoning_gemini(query))
+                return reasoning
+            else:
+                logger.error("Error in nested async reasoning: nest_asyncio not available")
+        else:
+            reasoning = loop.run_until_complete(autonomous_reasoning_gemini(query))
+            return reasoning
+    except Exception as e:
+        logger.error(f"Gemini MCP reasoning error: {e}")
+    logger.warning("⚠️ Falling back to default reasoning")
+    return {
+        "query_type": "general_info",
+        "complexity": "moderate",
+        "information_needs": ["medical information"],
+        "requires_rag": True,
+        "requires_web_search": False,
+        "sub_questions": [query]
+    }
+def create_execution_plan(reasoning: dict, query: str, has_rag_index: bool) -> dict:
+    """Planning: Create multi-step execution plan based on reasoning analysis"""
+    plan = {
+        "steps": [],
+        "strategy": "sequential",
+        "iterations": 1
+    }
+    if reasoning["complexity"] in ["complex", "multi_faceted"]:
+        plan["strategy"] = "iterative"
+        plan["iterations"] = 2
+    plan["steps"].append({
+        "step": 1,
+        "action": "detect_language",
+        "description": "Detect query language and translate if needed"
+    })
+    if reasoning.get("requires_rag", True) and has_rag_index:
+        plan["steps"].append({
+            "step": 2,
+            "action": "rag_retrieval",
+            "description": "Retrieve relevant document context",
+            "parameters": {"top_k": 15, "merge_threshold": 0.5}
+        })
+    if reasoning.get("requires_web_search", False):
+        plan["steps"].append({
+            "step": 3,
+            "action": "web_search",
+            "description": "Search web for current/updated information",
+            "parameters": {"max_results": 5}
+        })
+    if reasoning.get("sub_questions") and len(reasoning["sub_questions"]) > 1:
+        plan["steps"].append({
+            "step": 4,
+            "action": "multi_step_reasoning",
+            "description": "Process sub-questions iteratively",
+            "sub_questions": reasoning["sub_questions"]
+        })
+    plan["steps"].append({
+        "step": len(plan["steps"]) + 1,
+        "action": "synthesize_answer",
+        "description": "Generate comprehensive answer from all sources"
+    })
+    if reasoning["complexity"] in ["complex", "multi_faceted"]:
+        plan["steps"].append({
+            "step": len(plan["steps"]) + 1,
+            "action": "self_reflection",
+            "description": "Evaluate answer quality and completeness"
+        })
+    logger.info(f"Execution plan created: {len(plan['steps'])} steps")
+    return plan
+def autonomous_execution_strategy(reasoning: dict, plan: dict, use_rag: bool, use_web_search: bool, has_rag_index: bool) -> dict:
+    """Autonomous execution: Make decisions on information gathering strategy"""
+    strategy = {
+        "use_rag": use_rag,
+        "use_web_search": use_web_search,
+        "reasoning_override": False,
+        "rationale": ""
+    }
+    if reasoning.get("requires_web_search", False) and not use_web_search:
+        strategy["rationale"] = "Reasoning suggests web search for current information, but the user kept it disabled."
+    if strategy["rationale"]:
+        logger.info(f"Autonomous reasoning note: {strategy['rationale']}")
+    return strategy

search.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""Web search functions"""
+import json
+import asyncio
+import concurrent.futures
+from logger import logger
+from mcp import MCP_AVAILABLE, get_mcp_session, get_cached_mcp_tools, call_agent
+from config import GEMINI_MODEL
+try:
+    import nest_asyncio
+except ImportError:
+    nest_asyncio = None
+async def search_web_mcp_tool(query: str, max_results: int = 5) -> list:
+    """Search web using MCP web search tool (e.g., DuckDuckGo MCP server)"""
+    if not MCP_AVAILABLE:
+        return []
+    try:
+        tools = await get_cached_mcp_tools()
+        if not tools:
+            return []
+        search_tool = None
+        for tool in tools:
+            tool_name_lower = tool.name.lower()
+            if any(keyword in tool_name_lower for keyword in ["search", "duckduckgo", "ddg", "web"]):
+                search_tool = tool
+                logger.info(f"Found web search MCP tool: {tool.name}")
+                break
+        if not search_tool:
+            tools = await get_cached_mcp_tools(force_refresh=True)
+            for tool in tools:
+                tool_name_lower = tool.name.lower()
+                if any(keyword in tool_name_lower for keyword in ["search", "duckduckgo", "ddg", "web"]):
+                    search_tool = tool
+                    logger.info(f"Found web search MCP tool after refresh: {tool.name}")
+                    break
+        if search_tool:
+            try:
+                session = await get_mcp_session()
+                if session is None:
+                    return []
+                result = await session.call_tool(
+                    search_tool.name,
+                    arguments={"query": query, "max_results": max_results}
+                )
+                web_content = []
+                if hasattr(result, 'content') and result.content:
+                    for item in result.content:
+                        if hasattr(item, 'text'):
+                            try:
+                                data = json.loads(item.text)
+                                if isinstance(data, list):
+                                    for entry in data[:max_results]:
+                                        web_content.append({
+                                            'title': entry.get('title', ''),
+                                            'url': entry.get('url', entry.get('href', '')),
+                                            'content': entry.get('body', entry.get('snippet', entry.get('content', '')))
+                                        })
+                                elif isinstance(data, dict):
+                                    if 'results' in data:
+                                        for entry in data['results'][:max_results]:
+                                            web_content.append({
+                                                'title': entry.get('title', ''),
+                                                'url': entry.get('url', entry.get('href', '')),
+                                                'content': entry.get('body', entry.get('snippet', entry.get('content', '')))
+                                            })
+                                    else:
+                                        web_content.append({
+                                            'title': data.get('title', ''),
+                                            'url': data.get('url', data.get('href', '')),
+                                            'content': data.get('body', data.get('snippet', data.get('content', '')))
+                                        })
+                            except json.JSONDecodeError:
+                                web_content.append({
+                                    'title': '',
+                                    'url': '',
+                                    'content': item.text[:1000]
+                                })
+                if web_content:
+                    return web_content
+            except Exception as e:
+                logger.error(f"Error calling web search MCP tool: {e}")
+        else:
+            logger.debug("No MCP web search tool discovered in current catalog")
+            return []
+    except Exception as e:
+        logger.error(f"Web search MCP tool error: {e}")
+        return []
+async def search_web_mcp(query: str, max_results: int = 5) -> list:
+    """Search web using MCP tools - tries web search MCP tool first, then falls back to direct search"""
+    results = await search_web_mcp_tool(query, max_results)
+    if results:
+        logger.info(f"✅ Web search via MCP tool: found {len(results)} results")
+        return results
+    logger.info("ℹ️ [Direct API] No web search MCP tool found, using direct DuckDuckGo search (results will be summarized with Gemini MCP)")
+    return search_web_fallback(query, max_results)
+def search_web_fallback(query: str, max_results: int = 5) -> list:
+    """Fallback web search using DuckDuckGo directly (when MCP is not available)"""
+    logger.info(f"🔍 [Direct API] Performing web search using DuckDuckGo API for: {query[:100]}...")
+    try:
+        from ddgs import DDGS
+        import requests
+        from bs4 import BeautifulSoup
+    except ImportError:
+        logger.error("Fallback dependencies (ddgs, requests, beautifulsoup4) not available")
+        return []
+    try:
+        with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=max_results))
+            web_content = []
+            for result in results:
+                try:
+                    url = result.get('href', '')
+                    title = result.get('title', '')
+                    snippet = result.get('body', '')
+                    try:
+                        response = requests.get(url, timeout=5, headers={'User-Agent': 'Mozilla/5.0'})
+                        if response.status_code == 200:
+                            soup = BeautifulSoup(response.content, 'html.parser')
+                            for script in soup(["script", "style"]):
+                                script.decompose()
+                            text = soup.get_text()
+                            lines = (line.strip() for line in text.splitlines())
+                            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+                            text = ' '.join(chunk for chunk in chunks if chunk)
+                            if len(text) > 1000:
+                                text = text[:1000] + "..."
+                            web_content.append({
+                                'title': title,
+                                'url': url,
+                                'content': snippet + "\n" + text[:500] if text else snippet
+                            })
+                        else:
+                            web_content.append({
+                                'title': title,
+                                'url': url,
+                                'content': snippet
+                            })
+                    except:
+                        web_content.append({
+                            'title': title,
+                            'url': url,
+                            'content': snippet
+                        })
+                except Exception as e:
+                    logger.error(f"Error processing search result: {e}")
+                    continue
+            logger.info(f"✅ [Direct API] Web search completed: {len(web_content)} results")
+            return web_content
+    except Exception as e:
+        logger.error(f"❌ [Direct API] Web search error: {e}")
+        return []
+def search_web(query: str, max_results: int = 5) -> list:
+    """Search web using MCP tools (synchronous wrapper) - prioritizes MCP over direct ddgs"""
+    if MCP_AVAILABLE:
+        try:
+            try:
+                loop = asyncio.get_event_loop()
+            except RuntimeError:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+            if loop.is_running():
+                if nest_asyncio:
+                    results = nest_asyncio.run(search_web_mcp(query, max_results))
+                    if results:
+                        return results
+                else:
+                    with concurrent.futures.ThreadPoolExecutor() as executor:
+                        future = executor.submit(asyncio.run, search_web_mcp(query, max_results))
+                        results = future.result(timeout=30)
+                        if results:
+                            return results
+            else:
+                results = loop.run_until_complete(search_web_mcp(query, max_results))
+                if results:
+                    return results
+        except Exception as e:
+            logger.error(f"Error running async MCP search: {e}")
+    logger.info("ℹ️ [Direct API] Falling back to direct DuckDuckGo search (MCP unavailable or returned no results)")
+    return search_web_fallback(query, max_results)
+async def summarize_web_content_gemini(content_list: list, query: str) -> str:
+    """Summarize web search results using Gemini MCP"""
+    combined_content = "\n\n".join([f"Source: {item['title']}\n{item['content']}" for item in content_list[:3]])
+    user_prompt = f"""Summarize the following web search results related to the query: "{query}"
+Extract key medical information, facts, and insights. Be concise and focus on reliable information.
+Search Results:
+{combined_content}
+Summary:"""
+    system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately."
+    result = await call_agent(
+        user_prompt=user_prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL,
+        temperature=0.5
+    )
+    return result.strip()
+def summarize_web_content(content_list: list, query: str) -> str:
+    """Summarize web search results using Gemini MCP"""
+    if not MCP_AVAILABLE:
+        logger.warning("Gemini MCP not available for summarization")
+        if content_list:
+            return content_list[0].get('content', '')[:500]
+        return ""
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                summary = nest_asyncio.run(summarize_web_content_gemini(content_list, query))
+                if summary:
+                    return summary
+            else:
+                logger.error("Error in nested async summarization: nest_asyncio not available")
+        else:
+            summary = loop.run_until_complete(summarize_web_content_gemini(content_list, query))
+            if summary:
+                return summary
+    except Exception as e:
+        logger.error(f"Gemini MCP summarization error: {e}")
+    if content_list:
+        return content_list[0].get('content', '')[:500]
+    return ""

supervisor.py ADDED Viewed

	@@ -0,0 +1,717 @@

+"""Gemini Supervisor functions for MAC architecture"""
+import json
+import asyncio
+import torch
+import spaces
+from logger import logger
+from mcp import MCP_AVAILABLE, call_agent
+from config import GEMINI_MODEL, GEMINI_MODEL_LITE
+from utils import format_prompt_manually
+try:
+    import nest_asyncio
+except ImportError:
+    nest_asyncio = None
+async def gemini_supervisor_breakdown_async(query: str, use_rag: bool, use_web_search: bool, time_elapsed: float, max_duration: int = 120) -> dict:
+    """Gemini Supervisor: Break user query into sub-topics"""
+    remaining_time = max(15, max_duration - time_elapsed)
+    mode_description = []
+    if use_rag:
+        mode_description.append("RAG mode enabled - will use retrieved documents")
+    if use_web_search:
+        mode_description.append("Web search mode enabled - will search online sources")
+    if not mode_description:
+        mode_description.append("Direct answer mode - no additional context")
+    estimated_time_per_task = 8
+    max_topics_by_time = max(2, int((remaining_time - 20) / estimated_time_per_task))
+    max_topics = min(max_topics_by_time, 10)
+    prompt = f"""You are a supervisor agent coordinating with a MedSwin medical specialist model.
+Break the following medical query into focused sub-topics that MedSwin can answer sequentially.
+Explore different potential approaches to comprehensively address the topic.
+Query: "{query}"
+Mode: {', '.join(mode_description)}
+Time Remaining: ~{remaining_time:.1f}s
+Maximum Topics: {max_topics} (adjust based on complexity - use as many as needed for thorough coverage)
+Return ONLY valid JSON (no markdown, no tables, no explanations):
+{{
+  "sub_topics": [
+    {{
+      "id": 1,
+      "topic": "concise topic name",
+      "instruction": "specific directive for MedSwin to answer this topic",
+      "expected_tokens": 200,
+      "priority": "high|medium|low",
+      "approach": "brief description of approach/angle for this topic"
+    }},
+    ...
+  ],
+  "strategy": "brief strategy description explaining the breakdown approach",
+  "exploration_note": "brief note on different approaches explored"
+}}
+Guidelines:
+- Break down the query into as many subtasks as needed for comprehensive coverage
+- Explore different angles/approaches (e.g., clinical, diagnostic, treatment, prevention, research perspectives)
+- Each topic should be focused and answerable in ~200 tokens by MedSwin
+- Prioritize topics by importance (high priority first)
+- Don't limit yourself to 4 topics - use more if the query is complex or multi-faceted"""
+    system_prompt = "You are a medical query supervisor. Break queries into structured JSON sub-topics, exploring different approaches. Return ONLY valid JSON."
+    response = await call_agent(
+        user_prompt=prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL,
+        temperature=0.3
+    )
+    try:
+        json_start = response.find('{')
+        json_end = response.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            breakdown = json.loads(response[json_start:json_end])
+            logger.info(f"[GEMINI SUPERVISOR] Query broken into {len(breakdown.get('sub_topics', []))} sub-topics")
+            return breakdown
+        else:
+            raise ValueError("Supervisor JSON not found")
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Breakdown parsing failed: {exc}")
+        breakdown = {
+            "sub_topics": [
+                {"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},
+                {"id": 2, "topic": "Clinical Details", "instruction": "Provide key clinical insights", "expected_tokens": 200, "priority": "medium", "approach": "clinical perspective"},
+            ],
+            "strategy": "Sequential answer with key points",
+            "exploration_note": "Fallback breakdown - basic coverage"
+        }
+        logger.warning(f"[GEMINI SUPERVISOR] Using fallback breakdown")
+        return breakdown
+async def gemini_supervisor_search_strategies_async(query: str, time_elapsed: float) -> dict:
+    """Gemini Supervisor: In search mode, break query into 1-4 searching strategies"""
+    prompt = f"""You are supervising web search for a medical query.
+Break this query into 1-4 focused search strategies (each targeting 1-2 sources).
+Query: "{query}"
+Return ONLY valid JSON:
+{{
+  "search_strategies": [
+    {{
+      "id": 1,
+      "strategy": "search query string",
+      "target_sources": 1,
+      "focus": "what to search for"
+    }},
+    ...
+  ],
+  "max_strategies": 4
+}}
+Keep strategies focused and avoid overlap."""
+    system_prompt = "You are a search strategy supervisor. Create focused search queries. Return ONLY valid JSON."
+    response = await call_agent(
+        user_prompt=prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL_LITE,
+        temperature=0.2
+    )
+    try:
+        json_start = response.find('{')
+        json_end = response.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            strategies = json.loads(response[json_start:json_end])
+            logger.info(f"[GEMINI SUPERVISOR] Created {len(strategies.get('search_strategies', []))} search strategies")
+            return strategies
+        else:
+            raise ValueError("Search strategies JSON not found")
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Search strategies parsing failed: {exc}")
+        return {
+            "search_strategies": [
+                {"id": 1, "strategy": query, "target_sources": 2, "focus": "main query"}
+            ],
+            "max_strategies": 1
+        }
+async def gemini_supervisor_rag_brainstorm_async(query: str, retrieved_docs: str, time_elapsed: float) -> dict:
+    """Gemini Supervisor: In RAG mode, brainstorm retrieved documents into 1-4 short contexts"""
+    max_doc_length = 3000
+    if len(retrieved_docs) > max_doc_length:
+        retrieved_docs = retrieved_docs[:max_doc_length] + "..."
+    prompt = f"""You are supervising RAG context preparation for a medical query.
+Brainstorm the retrieved documents into 1-4 concise, focused contexts that MedSwin can use.
+Query: "{query}"
+Retrieved Documents:
+{retrieved_docs}
+Return ONLY valid JSON:
+{{
+  "contexts": [
+    {{
+      "id": 1,
+      "context": "concise summary of relevant information (keep under 500 chars)",
+      "focus": "what this context covers",
+      "relevance": "high|medium|low"
+    }},
+    ...
+  ],
+  "max_contexts": 4
+}}
+Keep contexts brief and factual. Avoid redundancy."""
+    system_prompt = "You are a RAG context supervisor. Summarize documents into concise contexts. Return ONLY valid JSON."
+    response = await call_agent(
+        user_prompt=prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL_LITE,
+        temperature=0.2
+    )
+    try:
+        json_start = response.find('{')
+        json_end = response.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            contexts = json.loads(response[json_start:json_end])
+            logger.info(f"[GEMINI SUPERVISOR] Brainstormed {len(contexts.get('contexts', []))} RAG contexts")
+            return contexts
+        else:
+            raise ValueError("RAG contexts JSON not found")
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] RAG brainstorming parsing failed: {exc}")
+        return {
+            "contexts": [
+                {"id": 1, "context": retrieved_docs[:500], "focus": "retrieved information", "relevance": "high"}
+            ],
+            "max_contexts": 1
+        }
+def gemini_supervisor_breakdown(query: str, use_rag: bool, use_web_search: bool, time_elapsed: float, max_duration: int = 120) -> dict:
+    """Wrapper to obtain supervisor breakdown synchronously"""
+    if not MCP_AVAILABLE:
+        logger.warning("[GEMINI SUPERVISOR] MCP unavailable, using fallback breakdown")
+        return {
+            "sub_topics": [
+                {"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},
+                {"id": 2, "topic": "Clinical Details", "instruction": "Provide key clinical insights", "expected_tokens": 200, "priority": "medium", "approach": "clinical perspective"},
+            ],
+            "strategy": "Sequential answer with key points",
+            "exploration_note": "Fallback breakdown - basic coverage"
+        }
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                return nest_asyncio.run(
+                    gemini_supervisor_breakdown_async(query, use_rag, use_web_search, time_elapsed, max_duration)
+                )
+            else:
+                logger.error("[GEMINI SUPERVISOR] Nested breakdown execution failed: nest_asyncio not available")
+        return loop.run_until_complete(
+            gemini_supervisor_breakdown_async(query, use_rag, use_web_search, time_elapsed, max_duration)
+        )
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Breakdown request failed: {exc}")
+        return {
+            "sub_topics": [
+                {"id": 1, "topic": "Core Question", "instruction": "Address the main medical question", "expected_tokens": 200, "priority": "high", "approach": "direct answer"},
+            ],
+            "strategy": "Direct answer",
+            "exploration_note": "Fallback breakdown - single topic"
+        }
+def gemini_supervisor_search_strategies(query: str, time_elapsed: float) -> dict:
+    """Wrapper to obtain search strategies synchronously"""
+    if not MCP_AVAILABLE:
+        logger.warning("[GEMINI SUPERVISOR] MCP unavailable for search strategies")
+        return {
+            "search_strategies": [
+                {"id": 1, "strategy": query, "target_sources": 2, "focus": "main query"}
+            ],
+            "max_strategies": 1
+        }
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                return nest_asyncio.run(gemini_supervisor_search_strategies_async(query, time_elapsed))
+            else:
+                logger.error("[GEMINI SUPERVISOR] Nested search strategies execution failed: nest_asyncio not available")
+        return loop.run_until_complete(gemini_supervisor_search_strategies_async(query, time_elapsed))
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Search strategies request failed: {exc}")
+        return {
+            "search_strategies": [
+                {"id": 1, "strategy": query, "target_sources": 2, "focus": "main query"}
+            ],
+            "max_strategies": 1
+        }
+def gemini_supervisor_rag_brainstorm(query: str, retrieved_docs: str, time_elapsed: float) -> dict:
+    """Wrapper to obtain RAG brainstorm synchronously"""
+    if not MCP_AVAILABLE:
+        logger.warning("[GEMINI SUPERVISOR] MCP unavailable for RAG brainstorm")
+        return {
+            "contexts": [
+                {"id": 1, "context": retrieved_docs[:500], "focus": "retrieved information", "relevance": "high"}
+            ],
+            "max_contexts": 1
+        }
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                return nest_asyncio.run(gemini_supervisor_rag_brainstorm_async(query, retrieved_docs, time_elapsed))
+            else:
+                logger.error("[GEMINI SUPERVISOR] Nested RAG brainstorm execution failed: nest_asyncio not available")
+        return loop.run_until_complete(gemini_supervisor_rag_brainstorm_async(query, retrieved_docs, time_elapsed))
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] RAG brainstorm request failed: {exc}")
+        return {
+            "contexts": [
+                {"id": 1, "context": retrieved_docs[:500], "focus": "retrieved information", "relevance": "high"}
+            ],
+            "max_contexts": 1
+        }
+@spaces.GPU(max_duration=120)
+def execute_medswin_task(
+    medical_model_obj,
+    medical_tokenizer,
+    task_instruction: str,
+    context: str,
+    system_prompt_base: str,
+    temperature: float,
+    max_new_tokens: int,
+    top_p: float,
+    top_k: int,
+    penalty: float
+) -> str:
+    """MedSwin Specialist: Execute a single task assigned by Gemini Supervisor"""
+    if context:
+        full_prompt = f"{system_prompt_base}\n\nContext:\n{context}\n\nTask: {task_instruction}\n\nAnswer concisely with key bullet points (Markdown format, no tables):"
+    else:
+        full_prompt = f"{system_prompt_base}\n\nTask: {task_instruction}\n\nAnswer concisely with key bullet points (Markdown format, no tables):"
+    messages = [{"role": "system", "content": full_prompt}]
+    if hasattr(medical_tokenizer, 'chat_template') and medical_tokenizer.chat_template is not None:
+        try:
+            prompt = medical_tokenizer.apply_chat_template(
+                messages,
+                tokenize=False,
+                add_generation_prompt=True
+            )
+        except Exception as e:
+            logger.warning(f"[MEDSWIN] Chat template failed, using manual formatting: {e}")
+            prompt = format_prompt_manually(messages, medical_tokenizer)
+    else:
+        prompt = format_prompt_manually(messages, medical_tokenizer)
+    inputs = medical_tokenizer(prompt, return_tensors="pt").to(medical_model_obj.device)
+    eos_token_id = medical_tokenizer.eos_token_id or medical_tokenizer.pad_token_id
+    with torch.no_grad():
+        outputs = medical_model_obj.generate(
+            **inputs,
+            max_new_tokens=min(max_new_tokens, 800),
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repetition_penalty=penalty,
+            do_sample=True,
+            eos_token_id=eos_token_id,
+            pad_token_id=medical_tokenizer.pad_token_id or eos_token_id
+        )
+    response = medical_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+    response = response.strip()
+    if "|" in response and "---" in response:
+        logger.warning("[MEDSWIN] Detected table format, converting to Markdown bullets")
+        lines = [line.strip() for line in response.split('\n') if line.strip() and not line.strip().startswith('|') and '---' not in line]
+        response = '\n'.join([f"- {line}" if not line.startswith('-') else line for line in lines])
+    logger.info(f"[MEDSWIN] Task completed: {len(response)} chars generated")
+    return response
+async def gemini_supervisor_synthesize_async(query: str, medswin_answers: list, rag_contexts: list, search_contexts: list, breakdown: dict) -> str:
+    """Gemini Supervisor: Synthesize final answer from all MedSwin responses"""
+    context_summary = ""
+    if rag_contexts:
+        context_summary += f"Document Context Available: {len(rag_contexts)} context(s) from uploaded documents.\n"
+    if search_contexts:
+        context_summary += f"Web Search Context Available: {len(search_contexts)} search result(s).\n"
+    all_answers_text = "\n\n---\n\n".join([f"## {i+1}. {ans}" for i, ans in enumerate(medswin_answers)])
+    prompt = f"""You are a supervisor agent synthesizing a comprehensive medical answer from multiple specialist responses.
+Original Query: "{query}"
+Context Available:
+{context_summary}
+MedSwin Specialist Responses (from {len(medswin_answers)} sub-topics):
+{all_answers_text}
+Your task:
+1. Synthesize all responses into a coherent, comprehensive final answer
+2. Integrate information from all sub-topics seamlessly
+3. Ensure the answer directly addresses the original query
+4. Maintain clinical accuracy and clarity
+5. Use clear structure with appropriate headings and bullet points
+6. Remove redundancy and contradictions
+7. Ensure all important points from MedSwin responses are included
+Return the final synthesized answer in Markdown format. Do not add meta-commentary or explanations - just provide the final answer."""
+    system_prompt = "You are a medical answer synthesis supervisor. Create comprehensive, well-structured final answers from multiple specialist responses."
+    result = await call_agent(
+        user_prompt=prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL,
+        temperature=0.3
+    )
+    return result.strip()
+async def gemini_supervisor_challenge_async(query: str, current_answer: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> dict:
+    """Gemini Supervisor: Challenge and evaluate the current answer"""
+    context_info = ""
+    if rag_contexts:
+        context_info += f"Document contexts: {len(rag_contexts)} available.\n"
+    if search_contexts:
+        context_info += f"Search contexts: {len(search_contexts)} available.\n"
+    all_answers_text = "\n\n---\n\n".join([f"## {i+1}. {ans}" for i, ans in enumerate(medswin_answers)])
+    prompt = f"""You are a supervisor agent evaluating and challenging a medical answer for quality and completeness.
+Original Query: "{query}"
+Available Context:
+{context_info}
+MedSwin Specialist Responses:
+{all_answers_text}
+Current Synthesized Answer:
+{current_answer[:2000]}
+Evaluate this answer and provide:
+1. Completeness: Does it fully address the query? What's missing?
+2. Accuracy: Are there any inaccuracies or contradictions?
+3. Clarity: Is it well-structured and clear?
+4. Context Usage: Are document/search contexts properly utilized?
+5. Improvement Suggestions: Specific ways to enhance the answer
+Return ONLY valid JSON:
+{{
+  "is_optimal": true/false,
+  "completeness_score": 0-10,
+  "accuracy_score": 0-10,
+  "clarity_score": 0-10,
+  "missing_aspects": ["..."],
+  "inaccuracies": ["..."],
+  "improvement_suggestions": ["..."],
+  "needs_more_context": true/false,
+  "enhancement_instructions": "specific instructions for improving the answer"
+}}"""
+    system_prompt = "You are a medical answer quality evaluator. Provide honest, constructive feedback in JSON format. Return ONLY valid JSON."
+    response = await call_agent(
+        user_prompt=prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL,
+        temperature=0.3
+    )
+    try:
+        json_start = response.find('{')
+        json_end = response.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            evaluation = json.loads(response[json_start:json_end])
+            logger.info(f"[GEMINI SUPERVISOR] Challenge evaluation: optimal={evaluation.get('is_optimal', False)}, scores={evaluation.get('completeness_score', 'N/A')}/{evaluation.get('accuracy_score', 'N/A')}/{evaluation.get('clarity_score', 'N/A')}")
+            return evaluation
+        else:
+            raise ValueError("Evaluation JSON not found")
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Challenge evaluation parsing failed: {exc}")
+        return {
+            "is_optimal": True,
+            "completeness_score": 7,
+            "accuracy_score": 7,
+            "clarity_score": 7,
+            "missing_aspects": [],
+            "inaccuracies": [],
+            "improvement_suggestions": [],
+            "needs_more_context": False,
+            "enhancement_instructions": ""
+        }
+async def gemini_supervisor_enhance_answer_async(query: str, current_answer: str, enhancement_instructions: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> str:
+    """Gemini Supervisor: Enhance the answer based on challenge feedback"""
+    context_info = ""
+    if rag_contexts:
+        context_info += f"Document contexts: {len(rag_contexts)} available.\n"
+    if search_contexts:
+        context_info += f"Search contexts: {len(search_contexts)} available.\n"
+    all_answers_text = "\n\n---\n\n".join([f"## {i+1}. {ans}" for i, ans in enumerate(medswin_answers)])
+    prompt = f"""You are a supervisor agent enhancing a medical answer based on evaluation feedback.
+Original Query: "{query}"
+Available Context:
+{context_info}
+MedSwin Specialist Responses:
+{all_answers_text}
+Current Answer (to enhance):
+{current_answer}
+Enhancement Instructions:
+{enhancement_instructions}
+Create an enhanced version of the answer that:
+1. Addresses all improvement suggestions
+2. Fills in missing aspects
+3. Corrects any inaccuracies
+4. Improves clarity and structure
+5. Better utilizes available context
+6. Maintains all valuable information from the current answer
+Return the enhanced answer in Markdown format. Do not add meta-commentary."""
+    system_prompt = "You are a medical answer enhancement supervisor. Improve answers based on evaluation feedback while maintaining accuracy."
+    result = await call_agent(
+        user_prompt=prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL,
+        temperature=0.3
+    )
+    return result.strip()
+async def gemini_supervisor_check_clarity_async(query: str, answer: str, use_web_search: bool) -> dict:
+    """Gemini Supervisor: Check if answer is unclear or supervisor is unsure"""
+    if not use_web_search:
+        return {"is_unclear": False, "needs_search": False, "search_queries": []}
+    prompt = f"""You are a supervisor agent evaluating answer clarity and completeness.
+Query: "{query}"
+Current Answer:
+{answer[:1500]}
+Evaluate:
+1. Is the answer unclear or incomplete?
+2. Are there gaps that web search could fill?
+3. Is the supervisor (you) unsure about certain aspects?
+Return ONLY valid JSON:
+{{
+  "is_unclear": true/false,
+  "needs_search": true/false,
+  "uncertainty_areas": ["..."],
+  "search_queries": ["specific search queries to fill gaps"],
+  "rationale": "brief explanation"
+}}
+Only suggest search if the answer is genuinely unclear or has significant gaps that search could address."""
+    system_prompt = "You are a clarity evaluator. Assess if additional web search is needed. Return ONLY valid JSON."
+    response = await call_agent(
+        user_prompt=prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL_LITE,
+        temperature=0.2
+    )
+    try:
+        json_start = response.find('{')
+        json_end = response.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            evaluation = json.loads(response[json_start:json_end])
+            logger.info(f"[GEMINI SUPERVISOR] Clarity check: unclear={evaluation.get('is_unclear', False)}, needs_search={evaluation.get('needs_search', False)}")
+            return evaluation
+        else:
+            raise ValueError("Clarity check JSON not found")
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Clarity check parsing failed: {exc}")
+        return {"is_unclear": False, "needs_search": False, "search_queries": []}
+def gemini_supervisor_synthesize(query: str, medswin_answers: list, rag_contexts: list, search_contexts: list, breakdown: dict) -> str:
+    """Wrapper to synthesize answer synchronously"""
+    if not MCP_AVAILABLE:
+        logger.warning("[GEMINI SUPERVISOR] MCP unavailable for synthesis, using simple concatenation")
+        return "\n\n".join(medswin_answers)
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                return nest_asyncio.run(gemini_supervisor_synthesize_async(query, medswin_answers, rag_contexts, search_contexts, breakdown))
+            else:
+                logger.error("[GEMINI SUPERVISOR] Nested synthesis failed: nest_asyncio not available")
+        return loop.run_until_complete(gemini_supervisor_synthesize_async(query, medswin_answers, rag_contexts, search_contexts, breakdown))
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Synthesis failed: {exc}")
+        return "\n\n".join(medswin_answers)
+def gemini_supervisor_challenge(query: str, current_answer: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> dict:
+    """Wrapper to challenge answer synchronously"""
+    if not MCP_AVAILABLE:
+        return {"is_optimal": True, "completeness_score": 7, "accuracy_score": 7, "clarity_score": 7, "missing_aspects": [], "inaccuracies": [], "improvement_suggestions": [], "needs_more_context": False, "enhancement_instructions": ""}
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                return nest_asyncio.run(gemini_supervisor_challenge_async(query, current_answer, medswin_answers, rag_contexts, search_contexts))
+            else:
+                logger.error("[GEMINI SUPERVISOR] Nested challenge failed: nest_asyncio not available")
+        return loop.run_until_complete(gemini_supervisor_challenge_async(query, current_answer, medswin_answers, rag_contexts, search_contexts))
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Challenge failed: {exc}")
+        return {"is_optimal": True, "completeness_score": 7, "accuracy_score": 7, "clarity_score": 7, "missing_aspects": [], "inaccuracies": [], "improvement_suggestions": [], "needs_more_context": False, "enhancement_instructions": ""}
+def gemini_supervisor_enhance_answer(query: str, current_answer: str, enhancement_instructions: str, medswin_answers: list, rag_contexts: list, search_contexts: list) -> str:
+    """Wrapper to enhance answer synchronously"""
+    if not MCP_AVAILABLE:
+        return current_answer
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                return nest_asyncio.run(gemini_supervisor_enhance_answer_async(query, current_answer, enhancement_instructions, medswin_answers, rag_contexts, search_contexts))
+            else:
+                logger.error("[GEMINI SUPERVISOR] Nested enhancement failed: nest_asyncio not available")
+        return loop.run_until_complete(gemini_supervisor_enhance_answer_async(query, current_answer, enhancement_instructions, medswin_answers, rag_contexts, search_contexts))
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Enhancement failed: {exc}")
+        return current_answer
+def gemini_supervisor_check_clarity(query: str, answer: str, use_web_search: bool) -> dict:
+    """Wrapper to check clarity synchronously"""
+    if not MCP_AVAILABLE or not use_web_search:
+        return {"is_unclear": False, "needs_search": False, "search_queries": []}
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                return nest_asyncio.run(gemini_supervisor_check_clarity_async(query, answer, use_web_search))
+            else:
+                logger.error("[GEMINI SUPERVISOR] Nested clarity check failed: nest_asyncio not available")
+        return loop.run_until_complete(gemini_supervisor_check_clarity_async(query, answer, use_web_search))
+    except Exception as exc:
+        logger.error(f"[GEMINI SUPERVISOR] Clarity check failed: {exc}")
+        return {"is_unclear": False, "needs_search": False, "search_queries": []}
+async def self_reflection_gemini(answer: str, query: str) -> dict:
+    """Self-reflection using Gemini MCP"""
+    reflection_prompt = f"""Evaluate this medical answer for quality and completeness:
+Query: "{query}"
+Answer: "{answer[:1000]}"
+Evaluate:
+1. Completeness: Does it address all aspects of the query?
+2. Accuracy: Is the medical information accurate?
+3. Clarity: Is it clear and well-structured?
+4. Sources: Are sources cited appropriately?
+5. Missing Information: What important information might be missing?
+Respond in JSON:
+{{
+    "completeness_score": 0-10,
+    "accuracy_score": 0-10,
+    "clarity_score": 0-10,
+    "overall_score": 0-10,
+    "missing_aspects": ["..."],
+    "improvement_suggestions": ["..."]
+}}"""
+    system_prompt = "You are a medical answer quality evaluator. Provide honest, constructive feedback."
+    response = await call_agent(
+        user_prompt=reflection_prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL,
+        temperature=0.3
+    )
+    try:
+        json_start = response.find('{')
+        json_end = response.rfind('}') + 1
+        if json_start >= 0 and json_end > json_start:
+            reflection = json.loads(response[json_start:json_end])
+        else:
+            reflection = {"overall_score": 7, "improvement_suggestions": []}
+    except:
+        reflection = {"overall_score": 7, "improvement_suggestions": []}
+    logger.info(f"Self-reflection score: {reflection.get('overall_score', 'N/A')}")
+    return reflection
+def self_reflection(answer: str, query: str, reasoning: dict) -> dict:
+    """Self-reflection: Evaluate answer quality and completeness"""
+    if not MCP_AVAILABLE:
+        logger.warning("Gemini MCP not available for reflection, using fallback")
+        return {"overall_score": 7, "improvement_suggestions": []}
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                return nest_asyncio.run(self_reflection_gemini(answer, query))
+            else:
+                logger.error("Error in nested async reflection: nest_asyncio not available")
+        else:
+            return loop.run_until_complete(self_reflection_gemini(answer, query))
+    except Exception as e:
+        logger.error(f"Gemini MCP reflection error: {e}")
+    return {"overall_score": 7, "improvement_suggestions": []}

ui.py ADDED Viewed

	@@ -0,0 +1,293 @@

+"""Gradio UI setup"""
+import time
+import gradio as gr
+from config import TITLE, DESCRIPTION, CSS, MEDSWIN_MODELS, DEFAULT_MEDICAL_MODEL
+from indexing import create_or_update_index
+from pipeline import stream_chat
+from voice import transcribe_audio, generate_speech
+def create_demo():
+    """Create and return Gradio demo interface"""
+    with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
+        gr.HTML(TITLE)
+        gr.HTML(DESCRIPTION)
+        with gr.Row(elem_classes="main-container"):
+            with gr.Column(elem_classes="upload-section"):
+                file_upload = gr.File(
+                    file_count="multiple",
+                    label="Drag and Drop Files Here",
+                    file_types=[".pdf", ".txt", ".doc", ".docx", ".md", ".json", ".xml", ".csv"],
+                    elem_id="file-upload"
+                )
+                upload_button = gr.Button("Upload & Index", elem_classes="upload-button")
+                status_output = gr.Textbox(
+                    label="Status",
+                    placeholder="Upload files to start...",
+                    interactive=False
+                )
+                file_info_output = gr.HTML(
+                    label="File Information",
+                    elem_classes="processing-info"
+                )
+                upload_button.click(
+                    fn=create_or_update_index,
+                    inputs=[file_upload],
+                    outputs=[status_output, file_info_output]
+                )
+            with gr.Column(elem_classes="chatbot-container"):
+                chatbot = gr.Chatbot(
+                    height=500,
+                    placeholder="Chat with MedSwin... Type your question below.",
+                    show_label=False,
+                    type="messages"
+                )
+                with gr.Row(elem_classes="input-row"):
+                    message_input = gr.Textbox(
+                        placeholder="Type your medical question here...",
+                        show_label=False,
+                        container=False,
+                        lines=1,
+                        scale=10
+                    )
+                    mic_button = gr.Audio(
+                        sources=["microphone"],
+                        type="filepath",
+                        label="",
+                        show_label=False,
+                        container=False,
+                        scale=1
+                    )
+                    submit_button = gr.Button("➤", elem_classes="submit-btn", scale=1)
+                recording_timer = gr.Textbox(
+                    value="",
+                    label="",
+                    show_label=False,
+                    interactive=False,
+                    visible=False,
+                    container=False,
+                    elem_classes="recording-timer"
+                )
+                recording_start_time = [None]
+                def handle_recording_start():
+                    """Called when recording starts"""
+                    recording_start_time[0] = time.time()
+                    return gr.update(visible=True, value="Recording... 0s")
+                def handle_recording_stop(audio):
+                    """Called when recording stops"""
+                    recording_start_time[0] = None
+                    if audio is None:
+                        return gr.update(visible=False, value=""), ""
+                    transcribed = transcribe_audio(audio)
+                    return gr.update(visible=False, value=""), transcribed
+                mic_button.start_recording(
+                    fn=handle_recording_start,
+                    outputs=[recording_timer]
+                )
+                mic_button.stop_recording(
+                    fn=handle_recording_stop,
+                    inputs=[mic_button],
+                    outputs=[recording_timer, message_input]
+                )
+                with gr.Row(visible=False) as tts_row:
+                    tts_text = gr.Textbox(visible=False)
+                    tts_audio = gr.Audio(label="Generated Speech", visible=False)
+                def generate_speech_from_chat(history):
+                    """Extract last assistant message and generate speech"""
+                    if not history or len(history) == 0:
+                        return None
+                    last_msg = history[-1]
+                    if last_msg.get("role") == "assistant":
+                        text = last_msg.get("content", "").replace(" 🔊", "").strip()
+                        if text:
+                            audio_path = generate_speech(text)
+                            return audio_path
+                    return None
+                tts_button = gr.Button("🔊 Play Response", visible=False, size="sm")
+                def update_tts_button(history):
+                    if history and len(history) > 0 and history[-1].get("role") == "assistant":
+                        return gr.update(visible=True)
+                    return gr.update(visible=False)
+                chatbot.change(
+                    fn=update_tts_button,
+                    inputs=[chatbot],
+                    outputs=[tts_button]
+                )
+                tts_button.click(
+                    fn=generate_speech_from_chat,
+                    inputs=[chatbot],
+                    outputs=[tts_audio]
+                )
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
+                    with gr.Row():
+                        disable_agentic_reasoning = gr.Checkbox(
+                            value=False,
+                            label="Disable agentic reasoning",
+                            info="Use MedSwin model alone without agentic reasoning, RAG, or web search"
+                        )
+                        show_agentic_thought = gr.Button(
+                            "Show agentic thought",
+                            size="sm"
+                        )
+                    agentic_thoughts_box = gr.Textbox(
+                        label="Agentic Thoughts",
+                        placeholder="Internal thoughts from MedSwin and supervisor will appear here...",
+                        lines=8,
+                        max_lines=15,
+                        interactive=False,
+                        visible=False,
+                        elem_classes="agentic-thoughts"
+                    )
+                    with gr.Row():
+                        use_rag = gr.Checkbox(
+                            value=False,
+                            label="Enable Document RAG",
+                            info="Answer based on uploaded documents (upload required)"
+                        )
+                        use_web_search = gr.Checkbox(
+                            value=False,
+                            label="Enable Web Search (MCP)",
+                            info="Fetch knowledge from online medical resources"
+                        )
+                    medical_model = gr.Radio(
+                        choices=list(MEDSWIN_MODELS.keys()),
+                        value=DEFAULT_MEDICAL_MODEL,
+                        label="Medical Model",
+                        info="MedSwin TA (default), others download on first use"
+                    )
+                    system_prompt = gr.Textbox(
+                        value="As a medical specialist, provide detailed and accurate answers based on the provided medical documents and context. Ensure all information is clinically accurate and cite sources when available.",
+                        label="System Prompt",
+                        lines=3
+                    )
+                    with gr.Tab("Generation Parameters"):
+                        temperature = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            step=0.1,
+                            value=0.2,
+                            label="Temperature"
+                        )
+                        max_new_tokens = gr.Slider(
+                            minimum=512,
+                            maximum=4096,
+                            step=128,
+                            value=2048,
+                            label="Max New Tokens",
+                            info="Increased for medical models to prevent early stopping"
+                        )
+                        top_p = gr.Slider(
+                            minimum=0.0,
+                            maximum=1.0,
+                            step=0.1,
+                            value=0.7,
+                            label="Top P"
+                        )
+                        top_k = gr.Slider(
+                            minimum=1,
+                            maximum=100,
+                            step=1,
+                            value=50,
+                            label="Top K"
+                        )
+                        penalty = gr.Slider(
+                            minimum=0.0,
+                            maximum=2.0,
+                            step=0.1,
+                            value=1.2,
+                            label="Repetition Penalty"
+                        )
+                    with gr.Tab("Retrieval Parameters"):
+                        retriever_k = gr.Slider(
+                            minimum=5,
+                            maximum=30,
+                            step=1,
+                            value=15,
+                            label="Initial Retrieval Size (Top K)"
+                        )
+                        merge_threshold = gr.Slider(
+                            minimum=0.1,
+                            maximum=0.9,
+                            step=0.1,
+                            value=0.5,
+                            label="Merge Threshold (lower = more merging)"
+                        )
+                show_thoughts_state = gr.State(value=False)
+                def toggle_thoughts_box(current_state):
+                    """Toggle visibility of agentic thoughts box"""
+                    new_state = not current_state
+                    return gr.update(visible=new_state), new_state
+                show_agentic_thought.click(
+                    fn=toggle_thoughts_box,
+                    inputs=[show_thoughts_state],
+                    outputs=[agentic_thoughts_box, show_thoughts_state]
+                )
+                submit_button.click(
+                    fn=stream_chat,
+                    inputs=[
+                        message_input,
+                        chatbot,
+                        system_prompt,
+                        temperature,
+                        max_new_tokens,
+                        top_p,
+                        top_k,
+                        penalty,
+                        retriever_k,
+                        merge_threshold,
+                        use_rag,
+                        medical_model,
+                        use_web_search,
+                        disable_agentic_reasoning,
+                        show_thoughts_state
+                    ],
+                    outputs=[chatbot, agentic_thoughts_box]
+                )
+                message_input.submit(
+                    fn=stream_chat,
+                    inputs=[
+                        message_input,
+                        chatbot,
+                        system_prompt,
+                        temperature,
+                        max_new_tokens,
+                        top_p,
+                        top_k,
+                        penalty,
+                        retriever_k,
+                        merge_threshold,
+                        use_rag,
+                        medical_model,
+                        use_web_search,
+                        disable_agentic_reasoning,
+                        show_thoughts_state
+                    ],
+                    outputs=[chatbot, agentic_thoughts_box]
+                )
+    return demo

utils.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""Utility functions for translation, language detection, and formatting"""
+import asyncio
+from langdetect import detect, LangDetectException
+from logger import logger
+from mcp import MCP_AVAILABLE, call_agent
+from config import GEMINI_MODEL_LITE
+try:
+    import nest_asyncio
+except ImportError:
+    nest_asyncio = None
+def format_prompt_manually(messages: list, tokenizer) -> str:
+    """Manually format prompt for models without chat template"""
+    system_content = ""
+    user_content = ""
+    for msg in messages:
+        role = msg.get("role", "user")
+        content = msg.get("content", "")
+        if role == "system":
+            system_content = content
+        elif role == "user":
+            user_content = content
+    if system_content:
+        prompt = f"{system_content}\n\nQuestion: {user_content}\n\nAnswer:"
+    else:
+        prompt = f"Question: {user_content}\n\nAnswer:"
+    return prompt
+def detect_language(text: str) -> str:
+    """Detect language of input text"""
+    try:
+        lang = detect(text)
+        return lang
+    except LangDetectException:
+        return "en"
+def format_url_as_domain(url: str) -> str:
+    """Format URL as simple domain name (e.g., www.mayoclinic.org)"""
+    if not url:
+        return ""
+    try:
+        from urllib.parse import urlparse
+        parsed = urlparse(url)
+        domain = parsed.netloc or parsed.path
+        if domain.startswith('www.'):
+            return domain
+        elif domain:
+            return domain
+        return url
+    except Exception:
+        if '://' in url:
+            domain = url.split('://')[1].split('/')[0]
+            return domain
+        return url
+async def translate_text_gemini(text: str, target_lang: str = "en", source_lang: str = None) -> str:
+    """Translate text using Gemini MCP"""
+    if source_lang:
+        user_prompt = f"Translate the following {source_lang} text to {target_lang}. Only provide the translation, no explanations:\n\n{text}"
+    else:
+        user_prompt = f"Translate the following text to {target_lang}. Only provide the translation, no explanations:\n\n{text}"
+    system_prompt = "You are a professional translator. Translate accurately and concisely."
+    result = await call_agent(
+        user_prompt=user_prompt,
+        system_prompt=system_prompt,
+        model=GEMINI_MODEL_LITE,
+        temperature=0.2
+    )
+    return result.strip()
+def translate_text(text: str, target_lang: str = "en", source_lang: str = None) -> str:
+    """Translate text using Gemini MCP"""
+    if not MCP_AVAILABLE:
+        logger.warning("Gemini MCP not available for translation")
+        return text
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            if nest_asyncio:
+                translated = nest_asyncio.run(translate_text_gemini(text, target_lang, source_lang))
+                if translated:
+                    logger.info(f"Translated via Gemini MCP: {translated[:50]}...")
+                    return translated
+            else:
+                logger.error("Error in nested async translation: nest_asyncio not available")
+        else:
+            translated = loop.run_until_complete(translate_text_gemini(text, target_lang, source_lang))
+            if translated:
+                logger.info(f"Translated via Gemini MCP: {translated[:50]}...")
+                return translated
+    except Exception as e:
+        logger.error(f"Gemini MCP translation error: {e}")
+    return text

voice.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""Audio transcription and text-to-speech functions"""
+import os
+import asyncio
+import tempfile
+import soundfile as sf
+from logger import logger
+from mcp import MCP_AVAILABLE, call_agent, get_mcp_session, get_cached_mcp_tools
+import config
+from models import TTS_AVAILABLE, initialize_tts_model
+try:
+    import nest_asyncio
+except ImportError:
+    nest_asyncio = None
+async def transcribe_audio_gemini(audio_path: str) -> str:
+    """Transcribe audio using Gemini MCP"""
+    if not MCP_AVAILABLE:
+        return ""
+    try:
+        audio_path_abs = os.path.abspath(audio_path)
+        files = [{"path": audio_path_abs}]
+        system_prompt = "You are a professional transcription service. Provide accurate, well-formatted transcripts."
+        user_prompt = "Please transcribe this audio file. Include speaker identification if multiple speakers are present, and format it with proper punctuation and paragraphs, remove mumble, ignore non-verbal noises."
+        result = await call_agent(
+            user_prompt=user_prompt,
+            system_prompt=system_prompt,
+            files=files,
+            model=config.GEMINI_MODEL_LITE,
+            temperature=0.2
+        )
+        return result.strip()
+    except Exception as e:
+        logger.error(f"Gemini transcription error: {e}")
+        return ""
+def transcribe_audio(audio):
+    """Transcribe audio to text using Gemini MCP"""
+    if audio is None:
+        return ""
+    try:
+        if isinstance(audio, str):
+            audio_path = audio
+        elif isinstance(audio, tuple):
+            sample_rate, audio_data = audio
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+                sf.write(tmp_file.name, audio_data, samplerate=sample_rate)
+                audio_path = tmp_file.name
+        else:
+            audio_path = audio
+        if MCP_AVAILABLE:
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_running():
+                    if nest_asyncio:
+                        transcribed = nest_asyncio.run(transcribe_audio_gemini(audio_path))
+                        if transcribed:
+                            logger.info(f"Transcribed via Gemini MCP: {transcribed[:50]}...")
+                            return transcribed
+                    else:
+                        logger.error("nest_asyncio not available for nested async transcription")
+                else:
+                    transcribed = loop.run_until_complete(transcribe_audio_gemini(audio_path))
+                    if transcribed:
+                        logger.info(f"Transcribed via Gemini MCP: {transcribed[:50]}...")
+                        return transcribed
+            except Exception as e:
+                logger.error(f"Gemini MCP transcription error: {e}")
+        logger.warning("Gemini MCP transcription not available")
+        return ""
+    except Exception as e:
+        logger.error(f"Transcription error: {e}")
+        return ""
+async def generate_speech_mcp(text: str) -> str:
+    """Generate speech using MCP TTS tool"""
+    if not MCP_AVAILABLE:
+        return None
+    try:
+        session = await get_mcp_session()
+        if session is None:
+            return None
+        tools = await get_cached_mcp_tools()
+        tts_tool = None
+        for tool in tools:
+            tool_name_lower = tool.name.lower()
+            if "tts" in tool_name_lower or "speech" in tool_name_lower or "synthesize" in tool_name_lower:
+                tts_tool = tool
+                logger.info(f"Found MCP TTS tool: {tool.name}")
+                break
+        if tts_tool:
+            result = await session.call_tool(
+                tts_tool.name,
+                arguments={"text": text, "language": "en"}
+            )
+            if hasattr(result, 'content') and result.content:
+                for item in result.content:
+                    if hasattr(item, 'text'):
+                        if os.path.exists(item.text):
+                            return item.text
+                    elif hasattr(item, 'data') and item.data:
+                        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+                            tmp_file.write(item.data)
+                            return tmp_file.name
+        return None
+    except Exception as e:
+        logger.warning(f"MCP TTS error: {e}")
+        return None
+def generate_speech(text: str):
+    """Generate speech from text using TTS model (with MCP fallback)"""
+    if not text or len(text.strip()) == 0:
+        return None
+    if MCP_AVAILABLE:
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                if nest_asyncio:
+                    audio_path = nest_asyncio.run(generate_speech_mcp(text))
+                    if audio_path:
+                        logger.info("Generated speech via MCP")
+                        return audio_path
+            else:
+                audio_path = loop.run_until_complete(generate_speech_mcp(text))
+                if audio_path:
+                    return audio_path
+        except Exception as e:
+            pass
+    if not TTS_AVAILABLE:
+        logger.error("TTS library not installed. Please install TTS to use voice generation.")
+        return None
+    if config.global_tts_model is None:
+        initialize_tts_model()
+    if config.global_tts_model is None:
+        logger.error("TTS model not available. Please check dependencies.")
+        return None
+    try:
+        wav = config.global_tts_model.tts(text)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+            sf.write(tmp_file.name, wav, samplerate=22050)
+            return tmp_file.name
+    except Exception as e:
+        logger.error(f"TTS error: {e}")
+        return None