import os
import gradio as gr
from llama_index import HuggingFaceLLMPredictor
from src.parse_tabular import symptom_index

# --- LlamaIndex utils import ---
from utils.llama_index_utils import get_llm_predictor, build_index, query_symptoms

# --- System prompt ---
SYSTEM_PROMPT = """
You are a medical assistant helping a user narrow down to the most likely ICD-10 code.
At each turn, EITHER ask one focused clarifying question (e.g. "Is your cough dry or productive?")
or, if you have enough info, output a final JSON with fields:
{"diagnoses":[…], "confidences":[…]}.
"""

def process_speech(new_transcript, history):
    # Skip if no new transcript
    if not new_transcript:
        return history
    
    # Build LLM predictor
    llm_predictor = HuggingFaceLLMPredictor(model_name_or_path=os.getenv("HF_MODEL", "gpt2-medium"))
    
    # Query index with conversation
    prompt = "\n".join([f"{role}: {msg}" for role, msg in history])
    prompt += f"\nuser: {new_transcript}"
    
    response = symptom_index.as_query_engine(
        llm_predictor=llm_predictor
    ).query(prompt)
    
    # Append the new exchange to history
    history.append((new_transcript, response.response))
    return history

# Build Gradio interface
demo = gr.Blocks()
with demo:
    gr.Markdown("# Symptom to ICD-10 Code Lookup (Audio Input)")
    chatbot = gr.Chatbot(label="Conversation")
    audio = gr.Audio(source="microphone", type="text", streaming=True)
    
    audio.stream(
        process_speech,
        inputs=[audio, chatbot], 
        outputs=chatbot,
        show_progress="hidden"
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0", 
        server_port=7860, 
        mcp_server=True
    )