Y Phung Nguyen commited on
Commit
acc39fd
·
1 Parent(s): 020a4b5

Fix GPU quota err

Browse files
Files changed (1) hide show
  1. ui.py +39 -12
ui.py CHANGED
@@ -461,12 +461,21 @@ def create_demo():
461
 
462
  except Exception as e:
463
  error_msg = str(e)
464
- # Check if it's a ZeroGPU quota/rate limit error - re-raise for retry
465
- if ("429" in error_msg or "Too Many Requests" in error_msg or
466
  "quota" in error_msg.lower() or "ZeroGPU" in error_msg or
467
- "runnning out" in error_msg.lower() or "running out" in error_msg.lower()):
 
 
468
  logger.warning(f"[STARTUP] ZeroGPU quota/rate limit error detected: {error_msg[:100]}")
469
- raise # Re-raise to trigger retry logic in wrapper
 
 
 
 
 
 
 
470
  logger.error(f"[STARTUP] ❌ Error in model loading startup: {e}")
471
  import traceback
472
  logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
@@ -579,6 +588,20 @@ def create_demo():
579
  logger.info(f"[STARTUP] ✅ Model loaded successfully on attempt {attempt}")
580
  return status_text, gr.update(interactive=is_ready), gr.update(interactive=is_ready)
581
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
  # Model didn't load, but no exception - might be a state issue
583
  logger.warning(f"[STARTUP] Model not ready after attempt {attempt}, but no error")
584
  if attempt < max_retries:
@@ -587,11 +610,13 @@ def create_demo():
587
  time.sleep(delay)
588
  continue
589
  else:
590
- return status_text, gr.update(interactive=False), gr.update(interactive=False)
 
591
  except Exception as e:
592
  error_msg = str(e)
593
  is_quota_error = ("429" in error_msg or "Too Many Requests" in error_msg or
594
- "quota" in error_msg.lower() or "ZeroGPU" in error_msg)
 
595
 
596
  if is_quota_error and attempt < max_retries:
597
  delay = base_delay * attempt # Exponential backoff: 5s, 10s, 15s
@@ -605,16 +630,18 @@ def create_demo():
605
  logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
606
 
607
  if is_quota_error:
608
- error_display = "⚠️ ZeroGPU quota/rate limit reached. Please wait or try again later."
 
 
 
609
  else:
610
  error_display = f"⚠️ Startup error: {str(e)[:100]}"
611
-
612
- if attempt >= max_retries:
613
- logger.error(f"[STARTUP] Failed after {max_retries} attempts")
614
- return error_display, gr.update(interactive=False), gr.update(interactive=False)
615
 
616
  # Should not reach here, but just in case
617
- return "⚠️ Startup failed after retries", gr.update(interactive=False), gr.update(interactive=False)
618
 
619
  demo.load(
620
  fn=load_startup_and_update_ui,
 
461
 
462
  except Exception as e:
463
  error_msg = str(e)
464
+ # Check if it's a ZeroGPU quota/rate limit error
465
+ is_quota_error = ("429" in error_msg or "Too Many Requests" in error_msg or
466
  "quota" in error_msg.lower() or "ZeroGPU" in error_msg or
467
+ "runnning out" in error_msg.lower() or "running out" in error_msg.lower())
468
+
469
+ if is_quota_error:
470
  logger.warning(f"[STARTUP] ZeroGPU quota/rate limit error detected: {error_msg[:100]}")
471
+ # Return status message indicating quota error (will be handled by retry logic)
472
+ status_messages.append("⚠️ ZeroGPU quota error - will retry")
473
+ status_text = "\n".join(status_messages)
474
+ # Also add ASR status
475
+ if WHISPER_AVAILABLE:
476
+ status_text += "\n⏳ ASR (Whisper): will load on first use"
477
+ return status_text # Return status instead of raising, let wrapper handle retry
478
+
479
  logger.error(f"[STARTUP] ❌ Error in model loading startup: {e}")
480
  import traceback
481
  logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
 
588
  logger.info(f"[STARTUP] ✅ Model loaded successfully on attempt {attempt}")
589
  return status_text, gr.update(interactive=is_ready), gr.update(interactive=is_ready)
590
  else:
591
+ # Check if status text indicates quota error
592
+ if status_text and ("quota" in status_text.lower() or "ZeroGPU" in status_text or
593
+ "429" in status_text or "runnning out" in status_text.lower() or
594
+ "running out" in status_text.lower()):
595
+ if attempt < max_retries:
596
+ delay = base_delay * attempt
597
+ logger.warning(f"[STARTUP] Quota error detected in status, retrying in {delay} seconds...")
598
+ time.sleep(delay)
599
+ continue
600
+ else:
601
+ # Quota exhausted after retries - allow user to proceed, model will load on-demand
602
+ status_msg = "⚠️ ZeroGPU quota exhausted.\n⏳ Model will load automatically when you send a message.\n💡 You can also select a model from the dropdown."
603
+ logger.info("[STARTUP] Quota exhausted after retries - allowing user to proceed with on-demand loading")
604
+ return status_msg, gr.update(interactive=True), gr.update(interactive=True)
605
  # Model didn't load, but no exception - might be a state issue
606
  logger.warning(f"[STARTUP] Model not ready after attempt {attempt}, but no error")
607
  if attempt < max_retries:
 
610
  time.sleep(delay)
611
  continue
612
  else:
613
+ # Even if model didn't load, allow user to try selecting another model
614
+ return status_text + "\n⚠️ Model not loaded. Please select a model from dropdown.", gr.update(interactive=True), gr.update(interactive=True)
615
  except Exception as e:
616
  error_msg = str(e)
617
  is_quota_error = ("429" in error_msg or "Too Many Requests" in error_msg or
618
+ "quota" in error_msg.lower() or "ZeroGPU" in error_msg or
619
+ "runnning out" in error_msg.lower() or "running out" in error_msg.lower())
620
 
621
  if is_quota_error and attempt < max_retries:
622
  delay = base_delay * attempt # Exponential backoff: 5s, 10s, 15s
 
630
  logger.debug(f"[STARTUP] Full traceback: {traceback.format_exc()}")
631
 
632
  if is_quota_error:
633
+ # If quota exhausted, allow user to proceed - model will load on-demand
634
+ error_display = "⚠️ ZeroGPU quota exhausted.\n⏳ Model will load automatically when you send a message.\n💡 You can also select a model from the dropdown."
635
+ logger.info("[STARTUP] Quota exhausted - allowing user to proceed with on-demand loading")
636
+ return error_display, gr.update(interactive=True), gr.update(interactive=True)
637
  else:
638
  error_display = f"⚠️ Startup error: {str(e)[:100]}"
639
+ if attempt >= max_retries:
640
+ logger.error(f"[STARTUP] Failed after {max_retries} attempts")
641
+ return error_display, gr.update(interactive=False), gr.update(interactive=False)
 
642
 
643
  # Should not reach here, but just in case
644
+ return "⚠️ Startup failed after retries. Please select a model from dropdown.", gr.update(interactive=True), gr.update(interactive=True)
645
 
646
  demo.load(
647
  fn=load_startup_and_update_ui,