Y Phung Nguyen commited on
Commit
63e92ef
·
1 Parent(s): daa4c4c

Upd gpu task ini

Browse files
Files changed (2) hide show
  1. app.py +10 -15
  2. models.py +8 -4
app.py CHANGED
@@ -10,20 +10,15 @@ from client import MCP_AVAILABLE
10
  from ui import create_demo
11
 
12
  if __name__ == "__main__":
13
- # Preload models on startup
14
- logger.info("Preloading models on startup...")
15
- logger.info("Initializing default medical model (MedSwin TA)...")
16
- initialize_medical_model(DEFAULT_MEDICAL_MODEL)
17
- logger.info("Preloading TTS model...")
18
- try:
19
- initialize_tts_model()
20
- if config.global_tts_model is not None:
21
- logger.info("TTS model preloaded successfully!")
22
- else:
23
- logger.warning("TTS model not available - will use MCP or disable voice generation")
24
- except Exception as e:
25
- logger.warning(f"TTS model preloading failed: {e}")
26
- logger.warning("Text-to-speech will use MCP or be disabled")
27
 
28
  # Check Gemini MCP availability
29
  if MCP_AVAILABLE:
@@ -57,6 +52,6 @@ if __name__ == "__main__":
57
  logger.info("ℹ️ Gemini MCP SDK not available - app will use fallback methods (direct API calls)")
58
  logger.info(" This is normal and the app will continue to work. MCP is optional.")
59
 
60
- logger.info("Model preloading complete!")
61
  demo = create_demo()
62
  demo.launch()
 
10
  from ui import create_demo
11
 
12
  if __name__ == "__main__":
13
+ # Note: Models are loaded on-demand when first needed (lazy loading)
14
+ # This avoids CUDA initialization in the main process, which is not allowed
15
+ # in ZeroGPU's stateless environment. Models will be loaded when stream_chat
16
+ # is called (which has the GPU decorator).
17
+ logger.info("App starting - models will be loaded on-demand when first needed")
18
+ logger.info(f"Default medical model: {DEFAULT_MEDICAL_MODEL}")
19
+
20
+ # TTS model also uses GPU decorator, so skip preloading
21
+ logger.info("TTS model will be loaded on-demand if needed")
 
 
 
 
 
22
 
23
  # Check Gemini MCP availability
24
  if MCP_AVAILABLE:
 
52
  logger.info("ℹ️ Gemini MCP SDK not available - app will use fallback methods (direct API calls)")
53
  logger.info(" This is normal and the app will continue to work. MCP is optional.")
54
 
55
+ logger.info("App initialization complete!")
56
  demo = create_demo()
57
  demo.launch()
models.py CHANGED
@@ -19,16 +19,20 @@ except ImportError:
19
  _model_loading_states = {}
20
  _model_loading_lock = threading.Lock()
21
 
22
- @spaces.GPU(max_duration=120)
23
  def set_model_loading_state(model_name: str, state: str):
24
- """Set model loading state: 'loading', 'loaded', 'error'"""
 
 
 
25
  with _model_loading_lock:
26
  _model_loading_states[model_name] = state
27
  logger.debug(f"Model {model_name} state set to: {state}")
28
 
29
- @spaces.GPU(max_duration=120)
30
  def get_model_loading_state(model_name: str) -> str:
31
- """Get model loading state: 'loading', 'loaded', 'error', or 'unknown'"""
 
 
 
32
  with _model_loading_lock:
33
  return _model_loading_states.get(model_name, "unknown")
34
 
 
19
  _model_loading_states = {}
20
  _model_loading_lock = threading.Lock()
21
 
 
22
  def set_model_loading_state(model_name: str, state: str):
23
+ """
24
+ Set model loading state: 'loading', 'loaded', 'error'
25
+ Note: No GPU decorator needed - this just sets a dictionary value, no GPU access required.
26
+ """
27
  with _model_loading_lock:
28
  _model_loading_states[model_name] = state
29
  logger.debug(f"Model {model_name} state set to: {state}")
30
 
 
31
  def get_model_loading_state(model_name: str) -> str:
32
+ """
33
+ Get model loading state: 'loading', 'loaded', 'error', or 'unknown'
34
+ Note: No GPU decorator needed - this just reads a dictionary value, no GPU access required.
35
+ """
36
  with _model_loading_lock:
37
  return _model_loading_states.get(model_name, "unknown")
38