Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

Y Phung Nguyen commited on 28 days ago

Commit

63e92ef

1 Parent(s): daa4c4c

Upd gpu task ini

Files changed (2) hide show

app.py CHANGED Viewed

@@ -10,20 +10,15 @@ from client import MCP_AVAILABLE
 from ui import create_demo
 if __name__ == "__main__":
-    # Preload models on startup
-    logger.info("Preloading models on startup...")
-    logger.info("Initializing default medical model (MedSwin TA)...")
-    initialize_medical_model(DEFAULT_MEDICAL_MODEL)
-    logger.info("Preloading TTS model...")
-    try:
-        initialize_tts_model()
-        if config.global_tts_model is not None:
-            logger.info("TTS model preloaded successfully!")
-        else:
-            logger.warning("TTS model not available - will use MCP or disable voice generation")
-    except Exception as e:
-        logger.warning(f"TTS model preloading failed: {e}")
-        logger.warning("Text-to-speech will use MCP or be disabled")
     # Check Gemini MCP availability
     if MCP_AVAILABLE:
@@ -57,6 +52,6 @@ if __name__ == "__main__":
         logger.info("ℹ️ Gemini MCP SDK not available - app will use fallback methods (direct API calls)")
         logger.info("   This is normal and the app will continue to work. MCP is optional.")
-    logger.info("Model preloading complete!")
     demo = create_demo()
     demo.launch()

 from ui import create_demo
 if __name__ == "__main__":
+    # Note: Models are loaded on-demand when first needed (lazy loading)
+    # This avoids CUDA initialization in the main process, which is not allowed
+    # in ZeroGPU's stateless environment. Models will be loaded when stream_chat
+    # is called (which has the GPU decorator).
+    logger.info("App starting - models will be loaded on-demand when first needed")
+    logger.info(f"Default medical model: {DEFAULT_MEDICAL_MODEL}")
+    # TTS model also uses GPU decorator, so skip preloading
+    logger.info("TTS model will be loaded on-demand if needed")
     # Check Gemini MCP availability
     if MCP_AVAILABLE:
         logger.info("ℹ️ Gemini MCP SDK not available - app will use fallback methods (direct API calls)")
         logger.info("   This is normal and the app will continue to work. MCP is optional.")
+    logger.info("App initialization complete!")
     demo = create_demo()
     demo.launch()

models.py CHANGED Viewed

@@ -19,16 +19,20 @@ except ImportError:
 _model_loading_states = {}
 _model_loading_lock = threading.Lock()
-@spaces.GPU(max_duration=120)
 def set_model_loading_state(model_name: str, state: str):
-    """Set model loading state: 'loading', 'loaded', 'error'"""
     with _model_loading_lock:
         _model_loading_states[model_name] = state
         logger.debug(f"Model {model_name} state set to: {state}")
-@spaces.GPU(max_duration=120)
 def get_model_loading_state(model_name: str) -> str:
-    """Get model loading state: 'loading', 'loaded', 'error', or 'unknown'"""
     with _model_loading_lock:
         return _model_loading_states.get(model_name, "unknown")

 _model_loading_states = {}
 _model_loading_lock = threading.Lock()
 def set_model_loading_state(model_name: str, state: str):
+    """
+    Set model loading state: 'loading', 'loaded', 'error'
+    Note: No GPU decorator needed - this just sets a dictionary value, no GPU access required.
+    """
     with _model_loading_lock:
         _model_loading_states[model_name] = state
         logger.debug(f"Model {model_name} state set to: {state}")
 def get_model_loading_state(model_name: str) -> str:
+    """
+    Get model loading state: 'loading', 'loaded', 'error', or 'unknown'
+    Note: No GPU decorator needed - this just reads a dictionary value, no GPU access required.
+    """
     with _model_loading_lock:
         return _model_loading_states.get(model_name, "unknown")