Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 20 days ago

Commit

c11b620

1 Parent(s): 4a5418d

Fix model preloader

Browse files

Files changed (2) hide show

models.py +7 -0
ui.py +6 -3

models.py CHANGED Viewed

@@ -77,11 +77,17 @@ def initialize_medical_model(model_name: str):
                 token=config.HF_TOKEN,
                 torch_dtype=torch.float16
             )
             config.global_medical_models[model_name] = model
             config.global_medical_tokenizers[model_name] = tokenizer
             set_model_loading_state(model_name, "loaded")
             logger.info(f"Medical model {model_name} initialized successfully")
             # Clear cache after loading to free up temporary memory
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
@@ -96,6 +102,7 @@ def initialize_medical_model(model_name: str):
     else:
         # Model already loaded, ensure state is set
         if get_model_loading_state(model_name) != "loaded":
             set_model_loading_state(model_name, "loaded")
     return config.global_medical_models[model_name], config.global_medical_tokenizers[model_name]

                 token=config.HF_TOKEN,
                 torch_dtype=torch.float16
             )
+            # Set models in config BEFORE setting state to "loaded"
             config.global_medical_models[model_name] = model
             config.global_medical_tokenizers[model_name] = tokenizer
+            # Set state to "loaded" AFTER models are stored
             set_model_loading_state(model_name, "loaded")
             logger.info(f"Medical model {model_name} initialized successfully")
+            # Verify the state was set correctly
+            if not is_model_loaded(model_name):
+                logger.warning(f"Model {model_name} initialized but is_model_loaded() returns False. State: {get_model_loading_state(model_name)}, in dict: {model_name in config.global_medical_models}")
             # Clear cache after loading to free up temporary memory
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
     else:
         # Model already loaded, ensure state is set
         if get_model_loading_state(model_name) != "loaded":
+            logger.info(f"Model {model_name} exists in config but state not set to 'loaded'. Setting state now.")
             set_model_loading_state(model_name, "loaded")
     return config.global_medical_models[model_name], config.global_medical_tokenizers[model_name]

ui.py CHANGED Viewed

@@ -696,8 +696,7 @@ def create_demo():
                             preload_model_on_input_focus()
                     except Exception as e:
                         logger.debug(f"[PRELOAD] Pre-load trigger error (non-critical): {e}")
-                    # Return empty string to not update any UI element
-                    return ""
                 # Trigger model pre-loading when user focuses on message input
                 message_input.focus(
@@ -714,8 +713,12 @@ def create_demo():
                     enable_clinical_intake, disable_agentic_reasoning, show_thoughts, request: gr.Request = None
                 ):
                     # Check if model is loaded - if not, show error (don't load here to save stream_chat time)
-                    if not is_model_loaded(medical_model_name):
                         loading_state = get_model_loading_state(medical_model_name)
                         if loading_state == "loading":
                             error_msg = f"⏳ {medical_model_name} is still loading. Please wait until the model status shows 'loaded and ready' before sending messages."
                         else:

                             preload_model_on_input_focus()
                     except Exception as e:
                         logger.debug(f"[PRELOAD] Pre-load trigger error (non-critical): {e}")
+                    # Don't return anything - outputs=None means no return value expected
                 # Trigger model pre-loading when user focuses on message input
                 message_input.focus(
                     enable_clinical_intake, disable_agentic_reasoning, show_thoughts, request: gr.Request = None
                 ):
                     # Check if model is loaded - if not, show error (don't load here to save stream_chat time)
+                    model_loaded = is_model_loaded(medical_model_name)
+                    if not model_loaded:
                         loading_state = get_model_loading_state(medical_model_name)
+                        # Debug logging to understand why model check fails
+                        logger.debug(f"[STREAM_CHAT] Model check: name={medical_model_name}, loaded={model_loaded}, state={loading_state}, in_dict={medical_model_name in config.global_medical_models}, model_exists={config.global_medical_models.get(medical_model_name) is not None if medical_model_name in config.global_medical_models else False}")
                         if loading_state == "loading":
                             error_msg = f"⏳ {medical_model_name} is still loading. Please wait until the model status shows 'loaded and ready' before sending messages."
                         else: