Spaces:
Running
on
Zero
Running
on
Zero
Y Phung Nguyen
commited on
Commit
·
acc39fd
1
Parent(s):
020a4b5
Fix GPU quota err
Browse files
ui.py
CHANGED
|
@@ -461,12 +461,21 @@ def create_demo():
|
|
| 461 |
|
| 462 |
except Exception as e:
|
| 463 |
error_msg = str(e)
|
| 464 |
-
# Check if it's a ZeroGPU quota/rate limit error
|
| 465 |
-
|
| 466 |
"quota" in error_msg.lower() or "ZeroGPU" in error_msg or
|
| 467 |
-
"runnning out" in error_msg.lower() or "running out" in error_msg.lower())
|
|
|
|
|
|
|
| 468 |
logger.warning(f"[STARTUP] ZeroGPU quota/rate limit error detected: {error_msg[:100]}")
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
logger.error(f"[STARTUP] ❌ Error in model loading startup: {e}")
|
| 471 |
import traceback
|
| 472 |
logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
|
|
@@ -579,6 +588,20 @@ def create_demo():
|
|
| 579 |
logger.info(f"[STARTUP] ✅ Model loaded successfully on attempt {attempt}")
|
| 580 |
return status_text, gr.update(interactive=is_ready), gr.update(interactive=is_ready)
|
| 581 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
# Model didn't load, but no exception - might be a state issue
|
| 583 |
logger.warning(f"[STARTUP] Model not ready after attempt {attempt}, but no error")
|
| 584 |
if attempt < max_retries:
|
|
@@ -587,11 +610,13 @@ def create_demo():
|
|
| 587 |
time.sleep(delay)
|
| 588 |
continue
|
| 589 |
else:
|
| 590 |
-
|
|
|
|
| 591 |
except Exception as e:
|
| 592 |
error_msg = str(e)
|
| 593 |
is_quota_error = ("429" in error_msg or "Too Many Requests" in error_msg or
|
| 594 |
-
"quota" in error_msg.lower() or "ZeroGPU" in error_msg
|
|
|
|
| 595 |
|
| 596 |
if is_quota_error and attempt < max_retries:
|
| 597 |
delay = base_delay * attempt # Exponential backoff: 5s, 10s, 15s
|
|
@@ -605,16 +630,18 @@ def create_demo():
|
|
| 605 |
logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
|
| 606 |
|
| 607 |
if is_quota_error:
|
| 608 |
-
|
|
|
|
|
|
|
|
|
|
| 609 |
else:
|
| 610 |
error_display = f"⚠️ Startup error: {str(e)[:100]}"
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
return error_display, gr.update(interactive=False), gr.update(interactive=False)
|
| 615 |
|
| 616 |
# Should not reach here, but just in case
|
| 617 |
-
return "⚠️ Startup failed after retries", gr.update(interactive=
|
| 618 |
|
| 619 |
demo.load(
|
| 620 |
fn=load_startup_and_update_ui,
|
|
|
|
| 461 |
|
| 462 |
except Exception as e:
|
| 463 |
error_msg = str(e)
|
| 464 |
+
# Check if it's a ZeroGPU quota/rate limit error
|
| 465 |
+
is_quota_error = ("429" in error_msg or "Too Many Requests" in error_msg or
|
| 466 |
"quota" in error_msg.lower() or "ZeroGPU" in error_msg or
|
| 467 |
+
"runnning out" in error_msg.lower() or "running out" in error_msg.lower())
|
| 468 |
+
|
| 469 |
+
if is_quota_error:
|
| 470 |
logger.warning(f"[STARTUP] ZeroGPU quota/rate limit error detected: {error_msg[:100]}")
|
| 471 |
+
# Return status message indicating quota error (will be handled by retry logic)
|
| 472 |
+
status_messages.append("⚠️ ZeroGPU quota error - will retry")
|
| 473 |
+
status_text = "\n".join(status_messages)
|
| 474 |
+
# Also add ASR status
|
| 475 |
+
if WHISPER_AVAILABLE:
|
| 476 |
+
status_text += "\n⏳ ASR (Whisper): will load on first use"
|
| 477 |
+
return status_text # Return status instead of raising, let wrapper handle retry
|
| 478 |
+
|
| 479 |
logger.error(f"[STARTUP] ❌ Error in model loading startup: {e}")
|
| 480 |
import traceback
|
| 481 |
logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
|
|
|
|
| 588 |
logger.info(f"[STARTUP] ✅ Model loaded successfully on attempt {attempt}")
|
| 589 |
return status_text, gr.update(interactive=is_ready), gr.update(interactive=is_ready)
|
| 590 |
else:
|
| 591 |
+
# Check if status text indicates quota error
|
| 592 |
+
if status_text and ("quota" in status_text.lower() or "ZeroGPU" in status_text or
|
| 593 |
+
"429" in status_text or "runnning out" in status_text.lower() or
|
| 594 |
+
"running out" in status_text.lower()):
|
| 595 |
+
if attempt < max_retries:
|
| 596 |
+
delay = base_delay * attempt
|
| 597 |
+
logger.warning(f"[STARTUP] Quota error detected in status, retrying in {delay} seconds...")
|
| 598 |
+
time.sleep(delay)
|
| 599 |
+
continue
|
| 600 |
+
else:
|
| 601 |
+
# Quota exhausted after retries - allow user to proceed, model will load on-demand
|
| 602 |
+
status_msg = "⚠️ ZeroGPU quota exhausted.\n⏳ Model will load automatically when you send a message.\n💡 You can also select a model from the dropdown."
|
| 603 |
+
logger.info("[STARTUP] Quota exhausted after retries - allowing user to proceed with on-demand loading")
|
| 604 |
+
return status_msg, gr.update(interactive=True), gr.update(interactive=True)
|
| 605 |
# Model didn't load, but no exception - might be a state issue
|
| 606 |
logger.warning(f"[STARTUP] Model not ready after attempt {attempt}, but no error")
|
| 607 |
if attempt < max_retries:
|
|
|
|
| 610 |
time.sleep(delay)
|
| 611 |
continue
|
| 612 |
else:
|
| 613 |
+
# Even if model didn't load, allow user to try selecting another model
|
| 614 |
+
return status_text + "\n⚠️ Model not loaded. Please select a model from dropdown.", gr.update(interactive=True), gr.update(interactive=True)
|
| 615 |
except Exception as e:
|
| 616 |
error_msg = str(e)
|
| 617 |
is_quota_error = ("429" in error_msg or "Too Many Requests" in error_msg or
|
| 618 |
+
"quota" in error_msg.lower() or "ZeroGPU" in error_msg or
|
| 619 |
+
"runnning out" in error_msg.lower() or "running out" in error_msg.lower())
|
| 620 |
|
| 621 |
if is_quota_error and attempt < max_retries:
|
| 622 |
delay = base_delay * attempt # Exponential backoff: 5s, 10s, 15s
|
|
|
|
| 630 |
logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
|
| 631 |
|
| 632 |
if is_quota_error:
|
| 633 |
+
# If quota exhausted, allow user to proceed - model will load on-demand
|
| 634 |
+
error_display = "⚠️ ZeroGPU quota exhausted.\n⏳ Model will load automatically when you send a message.\n💡 You can also select a model from the dropdown."
|
| 635 |
+
logger.info("[STARTUP] Quota exhausted - allowing user to proceed with on-demand loading")
|
| 636 |
+
return error_display, gr.update(interactive=True), gr.update(interactive=True)
|
| 637 |
else:
|
| 638 |
error_display = f"⚠️ Startup error: {str(e)[:100]}"
|
| 639 |
+
if attempt >= max_retries:
|
| 640 |
+
logger.error(f"[STARTUP] Failed after {max_retries} attempts")
|
| 641 |
+
return error_display, gr.update(interactive=False), gr.update(interactive=False)
|
|
|
|
| 642 |
|
| 643 |
# Should not reach here, but just in case
|
| 644 |
+
return "⚠️ Startup failed after retries. Please select a model from dropdown.", gr.update(interactive=True), gr.update(interactive=True)
|
| 645 |
|
| 646 |
demo.load(
|
| 647 |
fn=load_startup_and_update_ui,
|