Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 21 days ago

Commit

a5fe328

1 Parent(s): c5ac360

Upd models loader #2

Browse files

Files changed (2) hide show

models.py +39 -0
ui.py +86 -65

models.py CHANGED Viewed

@@ -63,6 +63,11 @@ def initialize_medical_model(model_name: str):
         set_model_loading_state(model_name, "loading")
         logger.info(f"Initializing medical model: {model_name}...")
         try:
             model_path = config.MEDSWIN_MODELS[model_name]
             tokenizer = AutoTokenizer.from_pretrained(model_path, token=config.HF_TOKEN)
             model = AutoModelForCausalLM.from_pretrained(
@@ -76,9 +81,17 @@ def initialize_medical_model(model_name: str):
             config.global_medical_tokenizers[model_name] = tokenizer
             set_model_loading_state(model_name, "loaded")
             logger.info(f"Medical model {model_name} initialized successfully")
         except Exception as e:
             set_model_loading_state(model_name, "error")
             logger.error(f"Failed to initialize medical model {model_name}: {e}")
             raise
     else:
         # Model already loaded, ensure state is set
@@ -93,13 +106,26 @@ def initialize_tts_model():
         return None
     if config.global_tts_model is None:
         try:
             logger.info("Initializing TTS model for voice generation...")
             config.global_tts_model = TTS(model_name=config.TTS_MODEL, progress_bar=False)
             logger.info("TTS model initialized successfully")
         except Exception as e:
             logger.warning(f"TTS model initialization failed: {e}")
             logger.warning("TTS features will be disabled. If pyworld dependency is missing, try: pip install TTS --no-deps && pip install coqui-tts")
             config.global_tts_model = None
     return config.global_tts_model
 def initialize_whisper_model():
@@ -109,6 +135,11 @@ def initialize_whisper_model():
         return None
     if config.global_whisper_model is None:
         try:
             logger.info("Initializing Whisper model (openai/whisper-large-v3-turbo) from Hugging Face...")
             model_id = "openai/whisper-large-v3-turbo"
             processor = WhisperProcessor.from_pretrained(model_id, token=config.HF_TOKEN)
@@ -121,10 +152,18 @@ def initialize_whisper_model():
             # Store both processor and model
             config.global_whisper_model = {"processor": processor, "model": model}
             logger.info(f"Whisper model ({model_id}) initialized successfully")
         except Exception as e:
             logger.warning(f"Whisper model initialization failed: {e}")
             logger.warning("ASR features will be disabled. Install with: pip install transformers torchaudio")
             config.global_whisper_model = None
     return config.global_whisper_model
 def get_or_create_embed_model():

         set_model_loading_state(model_name, "loading")
         logger.info(f"Initializing medical model: {model_name}...")
         try:
+            # Clear GPU cache before loading to prevent memory issues
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                logger.debug("Cleared GPU cache before model loading")
             model_path = config.MEDSWIN_MODELS[model_name]
             tokenizer = AutoTokenizer.from_pretrained(model_path, token=config.HF_TOKEN)
             model = AutoModelForCausalLM.from_pretrained(
             config.global_medical_tokenizers[model_name] = tokenizer
             set_model_loading_state(model_name, "loaded")
             logger.info(f"Medical model {model_name} initialized successfully")
+            # Clear cache after loading to free up temporary memory
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                logger.debug("Cleared GPU cache after model loading")
         except Exception as e:
             set_model_loading_state(model_name, "error")
             logger.error(f"Failed to initialize medical model {model_name}: {e}")
+            # Clear cache on error
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
             raise
     else:
         # Model already loaded, ensure state is set
         return None
     if config.global_tts_model is None:
         try:
+            # Clear GPU cache before loading
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                logger.debug("Cleared GPU cache before TTS model loading")
             logger.info("Initializing TTS model for voice generation...")
             config.global_tts_model = TTS(model_name=config.TTS_MODEL, progress_bar=False)
             logger.info("TTS model initialized successfully")
+            # Clear cache after loading
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                logger.debug("Cleared GPU cache after TTS model loading")
         except Exception as e:
             logger.warning(f"TTS model initialization failed: {e}")
             logger.warning("TTS features will be disabled. If pyworld dependency is missing, try: pip install TTS --no-deps && pip install coqui-tts")
             config.global_tts_model = None
+            # Clear cache on error
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
     return config.global_tts_model
 def initialize_whisper_model():
         return None
     if config.global_whisper_model is None:
         try:
+            # Clear GPU cache before loading
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                logger.debug("Cleared GPU cache before Whisper model loading")
             logger.info("Initializing Whisper model (openai/whisper-large-v3-turbo) from Hugging Face...")
             model_id = "openai/whisper-large-v3-turbo"
             processor = WhisperProcessor.from_pretrained(model_id, token=config.HF_TOKEN)
             # Store both processor and model
             config.global_whisper_model = {"processor": processor, "model": model}
             logger.info(f"Whisper model ({model_id}) initialized successfully")
+            # Clear cache after loading
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                logger.debug("Cleared GPU cache after Whisper model loading")
         except Exception as e:
             logger.warning(f"Whisper model initialization failed: {e}")
             logger.warning("ASR features will be disabled. Install with: pip install transformers torchaudio")
             config.global_whisper_model = None
+            # Clear cache on error
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
     return config.global_whisper_model
 def get_or_create_embed_model():

ui.py CHANGED Viewed

@@ -290,7 +290,7 @@ def create_demo():
                 )
                 # GPU-decorated function to load any model (for user selection)
-                @spaces.GPU(max_duration=120)
                 def load_model_with_gpu(model_name):
                     """Load medical model (GPU-decorated for ZeroGPU compatibility)"""
                     try:
@@ -404,59 +404,103 @@ def create_demo():
                     is_ready = is_model_loaded(model_name)
                     return status_text, is_ready
-                # GPU-decorated function to load model on startup
-                @spaces.GPU(max_duration=120)
-                def load_default_model_on_startup():
-                    """Load default medical model on startup (GPU-decorated for ZeroGPU compatibility)"""
                     try:
                         if not is_model_loaded(DEFAULT_MEDICAL_MODEL):
-                            logger.info(f"Loading default medical model on startup: {DEFAULT_MEDICAL_MODEL}...")
                             set_model_loading_state(DEFAULT_MEDICAL_MODEL, "loading")
                             try:
                                 initialize_medical_model(DEFAULT_MEDICAL_MODEL)
-                                logger.info(f"✅ Default medical model {DEFAULT_MEDICAL_MODEL} loaded successfully on startup!")
-                                return f"✅ {DEFAULT_MEDICAL_MODEL} loaded successfully"
                             except Exception as e:
-                                logger.error(f"Failed to load default medical model on startup: {e}")
                                 set_model_loading_state(DEFAULT_MEDICAL_MODEL, "error")
-                                return f"❌ Error loading model: {str(e)[:100]}"
                         else:
-                            logger.info(f"Default medical model {DEFAULT_MEDICAL_MODEL} is already loaded")
-                            return f"✅ {DEFAULT_MEDICAL_MODEL} is ready"
-                    except Exception as e:
-                        logger.error(f"Error in model loading startup: {e}")
-                        return f"⚠️ Startup loading error: {str(e)[:100]}"
-                # GPU-decorated function to load default TTS and ASR models on startup
-                @spaces.GPU(max_duration=120)
-                def load_voice_models_on_startup():
-                    """Load default TTS model (maya1) and ASR model (Whisper) on startup"""
-                    try:
-                        # Load TTS model
                         if TTS_AVAILABLE:
-                            logger.info("Loading default TTS model (maya1) on startup...")
-                            initialize_tts_model()
-                            if config.global_tts_model is not None:
-                                logger.info("✅ Default TTS model (maya1) loaded successfully on startup!")
-                            else:
-                                logger.warning("⚠️ TTS model failed to load on startup")
                         else:
-                            logger.warning("TTS library not installed; skipping TTS preload.")
-                        # Load ASR (Whisper) model
                         if WHISPER_AVAILABLE:
-                            logger.info("Loading default ASR model (Whisper large-v3-turbo) on startup...")
-                            initialize_whisper_model()
-                            if config.global_whisper_model is not None:
-                                logger.info("✅ Default ASR model (Whisper large-v3-turbo) loaded successfully on startup!")
-                            else:
-                                logger.warning("⚠️ ASR model failed to load on startup")
                         else:
-                            logger.warning("Whisper transformers not installed; skipping ASR preload.")
                     except Exception as e:
-                        logger.error(f"Error in voice models loading startup: {e}")
                         import traceback
-                        logger.debug(f"Full traceback: {traceback.format_exc()}")
                 # Initialize status on load
                 def init_model_status():
@@ -522,33 +566,10 @@ def create_demo():
                     outputs=[model_status, submit_button, message_input]
                 )
-                # Load models on startup - they will be loaded in separate GPU sessions
-                # First load medical model
-                demo.load(
-                    fn=load_default_model_on_startup,
-                    inputs=None,
-                    outputs=[model_status]
-                )
-                # Then load voice models (TTS and ASR)
-                demo.load(
-                    fn=load_voice_models_on_startup,
-                    inputs=None,
-                    outputs=None
-                )
-                # Finally update status to show all models
-                def update_status_after_load():
-                    try:
-                        result = check_model_status(DEFAULT_MEDICAL_MODEL)
-                        if result and isinstance(result, tuple) and len(result) == 2:
-                            return result[0]
-                        else:
-                            return "⚠️ Unable to check model status"
-                    except Exception as e:
-                        logger.error(f"Error updating status after load: {e}")
-                        return f"⚠️ Error: {str(e)[:100]}"
                 demo.load(
-                    fn=update_status_after_load,
                     inputs=None,
                     outputs=[model_status]
                 )

                 )
                 # GPU-decorated function to load any model (for user selection)
+                # @spaces.GPU(max_duration=120)
                 def load_model_with_gpu(model_name):
                     """Load medical model (GPU-decorated for ZeroGPU compatibility)"""
                     try:
                     is_ready = is_model_loaded(model_name)
                     return status_text, is_ready
+                # GPU-decorated function to load ALL models sequentially on startup
+                # This prevents ZeroGPU conflicts from multiple simultaneous GPU requests
+                # @spaces.GPU(max_duration=180)
+                def load_all_models_on_startup():
+                    """Load all models sequentially in a single GPU session to avoid ZeroGPU conflicts"""
+                    import time
+                    import torch
+                    status_messages = []
                     try:
+                        # Clear GPU cache at start
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                            logger.info("[STARTUP] Cleared GPU cache before model loading")
+                        # Step 1: Load medical model (MedSwin)
                         if not is_model_loaded(DEFAULT_MEDICAL_MODEL):
+                            logger.info(f"[STARTUP] Step 1/3: Loading medical model: {DEFAULT_MEDICAL_MODEL}...")
                             set_model_loading_state(DEFAULT_MEDICAL_MODEL, "loading")
                             try:
                                 initialize_medical_model(DEFAULT_MEDICAL_MODEL)
+                                status_messages.append(f"✅ MedSwin ({DEFAULT_MEDICAL_MODEL}): loaded")
+                                logger.info(f"[STARTUP] ✅ Medical model {DEFAULT_MEDICAL_MODEL} loaded successfully!")
                             except Exception as e:
+                                status_messages.append(f"❌ MedSwin ({DEFAULT_MEDICAL_MODEL}): error")
+                                logger.error(f"[STARTUP] Failed to load medical model: {e}")
                                 set_model_loading_state(DEFAULT_MEDICAL_MODEL, "error")
                         else:
+                            status_messages.append(f"✅ MedSwin ({DEFAULT_MEDICAL_MODEL}): already loaded")
+                            logger.info(f"[STARTUP] Medical model {DEFAULT_MEDICAL_MODEL} already loaded")
+                        # Small delay to let GPU settle and clear cache
+                        time.sleep(2)
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                            logger.debug("[STARTUP] Cleared GPU cache after medical model")
+                        # Step 2: Load TTS model (maya1)
                         if TTS_AVAILABLE:
+                            logger.info("[STARTUP] Step 2/3: Loading TTS model (maya1)...")
+                            try:
+                                initialize_tts_model()
+                                if config.global_tts_model is not None:
+                                    status_messages.append("✅ TTS (maya1): loaded")
+                                    logger.info("[STARTUP] ✅ TTS model loaded successfully!")
+                                else:
+                                    status_messages.append("⚠️ TTS (maya1): failed")
+                                    logger.warning("[STARTUP] ⚠️ TTS model failed to load")
+                            except Exception as e:
+                                status_messages.append("❌ TTS (maya1): error")
+                                logger.error(f"[STARTUP] TTS model loading error: {e}")
                         else:
+                            status_messages.append("❌ TTS: library not available")
+                            logger.warning("[STARTUP] TTS library not installed")
+                        # Small delay to let GPU settle and clear cache
+                        time.sleep(2)
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                            logger.debug("[STARTUP] Cleared GPU cache after TTS model")
+                        # Step 3: Load ASR model (Whisper)
                         if WHISPER_AVAILABLE:
+                            logger.info("[STARTUP] Step 3/3: Loading ASR model (Whisper)...")
+                            try:
+                                initialize_whisper_model()
+                                if config.global_whisper_model is not None:
+                                    status_messages.append("✅ ASR (Whisper): loaded")
+                                    logger.info("[STARTUP] ✅ ASR model loaded successfully!")
+                                else:
+                                    status_messages.append("⚠️ ASR (Whisper): failed")
+                                    logger.warning("[STARTUP] ⚠️ ASR model failed to load")
+                            except Exception as e:
+                                status_messages.append("❌ ASR (Whisper): error")
+                                logger.error(f"[STARTUP] ASR model loading error: {e}")
                         else:
+                            status_messages.append("❌ ASR: library not available")
+                            logger.warning("[STARTUP] Whisper library not installed")
+                        # Final cache clear
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                            logger.debug("[STARTUP] Final GPU cache clear")
+                        # Return combined status
+                        status_text = "\n".join(status_messages)
+                        logger.info(f"[STARTUP] ✅ Model loading complete. Status:\n{status_text}")
+                        return status_text
                     except Exception as e:
+                        logger.error(f"[STARTUP] ❌ Error in model loading startup: {e}")
                         import traceback
+                        logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
+                        # Clear cache on error
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                        return f"⚠️ Startup loading error: {str(e)[:100]}"
                 # Initialize status on load
                 def init_model_status():
                     outputs=[model_status, submit_button, message_input]
                 )
+                # Load ALL models sequentially in a SINGLE GPU session to avoid ZeroGPU conflicts
+                # This prevents "GPU aborted" errors from multiple simultaneous GPU requests
                 demo.load(
+                    fn=load_all_models_on_startup,
                     inputs=None,
                     outputs=[model_status]
                 )