import os import gradio as gr from llama_index import HuggingFaceLLMPredictor from src.parse_tabular import symptom_index # --- LlamaIndex utils import --- from utils.llama_index_utils import get_llm_predictor, build_index, query_symptoms # --- System prompt --- SYSTEM_PROMPT = """ You are a medical assistant helping a user narrow down to the most likely ICD-10 code. At each turn, EITHER ask one focused clarifying question (e.g. "Is your cough dry or productive?") or, if you have enough info, output a final JSON with fields: {"diagnoses":[…], "confidences":[…]}. """ def process_speech(new_transcript, history): # Skip if no new transcript if not new_transcript: return history # Build LLM predictor llm_predictor = HuggingFaceLLMPredictor(model_name_or_path=os.getenv("HF_MODEL", "gpt2-medium")) # Query index with conversation prompt = "\n".join([f"{role}: {msg}" for role, msg in history]) prompt += f"\nuser: {new_transcript}" response = symptom_index.as_query_engine( llm_predictor=llm_predictor ).query(prompt) # Append the new exchange to history history.append((new_transcript, response.response)) return history # Build Gradio interface demo = gr.Blocks() with demo: gr.Markdown("# Symptom to ICD-10 Code Lookup (Audio Input)") chatbot = gr.Chatbot(label="Conversation") audio = gr.Audio(source="microphone", type="text", streaming=True) audio.stream( process_speech, inputs=[audio, chatbot], outputs=chatbot, show_progress="hidden" ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, mcp_server=True )