Spaces:
Running
on
Zero
Running
on
Zero
Y Phung Nguyen
commited on
Commit
·
63e92ef
1
Parent(s):
daa4c4c
Upd gpu task ini
Browse files
app.py
CHANGED
|
@@ -10,20 +10,15 @@ from client import MCP_AVAILABLE
|
|
| 10 |
from ui import create_demo
|
| 11 |
|
| 12 |
if __name__ == "__main__":
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
logger.info("
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
else:
|
| 23 |
-
logger.warning("TTS model not available - will use MCP or disable voice generation")
|
| 24 |
-
except Exception as e:
|
| 25 |
-
logger.warning(f"TTS model preloading failed: {e}")
|
| 26 |
-
logger.warning("Text-to-speech will use MCP or be disabled")
|
| 27 |
|
| 28 |
# Check Gemini MCP availability
|
| 29 |
if MCP_AVAILABLE:
|
|
@@ -57,6 +52,6 @@ if __name__ == "__main__":
|
|
| 57 |
logger.info("ℹ️ Gemini MCP SDK not available - app will use fallback methods (direct API calls)")
|
| 58 |
logger.info(" This is normal and the app will continue to work. MCP is optional.")
|
| 59 |
|
| 60 |
-
logger.info("
|
| 61 |
demo = create_demo()
|
| 62 |
demo.launch()
|
|
|
|
| 10 |
from ui import create_demo
|
| 11 |
|
| 12 |
if __name__ == "__main__":
|
| 13 |
+
# Note: Models are loaded on-demand when first needed (lazy loading)
|
| 14 |
+
# This avoids CUDA initialization in the main process, which is not allowed
|
| 15 |
+
# in ZeroGPU's stateless environment. Models will be loaded when stream_chat
|
| 16 |
+
# is called (which has the GPU decorator).
|
| 17 |
+
logger.info("App starting - models will be loaded on-demand when first needed")
|
| 18 |
+
logger.info(f"Default medical model: {DEFAULT_MEDICAL_MODEL}")
|
| 19 |
+
|
| 20 |
+
# TTS model also uses GPU decorator, so skip preloading
|
| 21 |
+
logger.info("TTS model will be loaded on-demand if needed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Check Gemini MCP availability
|
| 24 |
if MCP_AVAILABLE:
|
|
|
|
| 52 |
logger.info("ℹ️ Gemini MCP SDK not available - app will use fallback methods (direct API calls)")
|
| 53 |
logger.info(" This is normal and the app will continue to work. MCP is optional.")
|
| 54 |
|
| 55 |
+
logger.info("App initialization complete!")
|
| 56 |
demo = create_demo()
|
| 57 |
demo.launch()
|
models.py
CHANGED
|
@@ -19,16 +19,20 @@ except ImportError:
|
|
| 19 |
_model_loading_states = {}
|
| 20 |
_model_loading_lock = threading.Lock()
|
| 21 |
|
| 22 |
-
@spaces.GPU(max_duration=120)
|
| 23 |
def set_model_loading_state(model_name: str, state: str):
|
| 24 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 25 |
with _model_loading_lock:
|
| 26 |
_model_loading_states[model_name] = state
|
| 27 |
logger.debug(f"Model {model_name} state set to: {state}")
|
| 28 |
|
| 29 |
-
@spaces.GPU(max_duration=120)
|
| 30 |
def get_model_loading_state(model_name: str) -> str:
|
| 31 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 32 |
with _model_loading_lock:
|
| 33 |
return _model_loading_states.get(model_name, "unknown")
|
| 34 |
|
|
|
|
| 19 |
_model_loading_states = {}
|
| 20 |
_model_loading_lock = threading.Lock()
|
| 21 |
|
|
|
|
| 22 |
def set_model_loading_state(model_name: str, state: str):
|
| 23 |
+
"""
|
| 24 |
+
Set model loading state: 'loading', 'loaded', 'error'
|
| 25 |
+
Note: No GPU decorator needed - this just sets a dictionary value, no GPU access required.
|
| 26 |
+
"""
|
| 27 |
with _model_loading_lock:
|
| 28 |
_model_loading_states[model_name] = state
|
| 29 |
logger.debug(f"Model {model_name} state set to: {state}")
|
| 30 |
|
|
|
|
| 31 |
def get_model_loading_state(model_name: str) -> str:
|
| 32 |
+
"""
|
| 33 |
+
Get model loading state: 'loading', 'loaded', 'error', or 'unknown'
|
| 34 |
+
Note: No GPU decorator needed - this just reads a dictionary value, no GPU access required.
|
| 35 |
+
"""
|
| 36 |
with _model_loading_lock:
|
| 37 |
return _model_loading_states.get(model_name, "unknown")
|
| 38 |
|