Y Phung Nguyen commited on
Commit
d8e18ef
·
1 Parent(s): 8515412

Fix BodyStreamBuffer err

Browse files
Files changed (1) hide show
  1. ui.py +70 -5
ui.py CHANGED
@@ -750,7 +750,10 @@ def create_demo():
750
  request = MockRequest()
751
 
752
  # Model is loaded, proceed with stream_chat (no model loading here to save time)
 
 
753
  last_result = None
 
754
  try:
755
  for result in stream_chat(
756
  message, history, system_prompt, temperature, max_new_tokens,
@@ -759,11 +762,40 @@ def create_demo():
759
  enable_clinical_intake, disable_agentic_reasoning, show_thoughts, request
760
  ):
761
  last_result = result
762
- yield result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  except Exception as e:
764
  # Handle any errors gracefully
765
  error_str = str(e)
766
  error_msg_lower = error_str.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
  is_gpu_timeout = 'gpu task aborted' in error_msg_lower or 'timeout' in error_msg_lower
768
 
769
  logger.error(f"Error in stream_chat_with_model_check: {error_str}")
@@ -789,16 +821,35 @@ def create_demo():
789
  except Exception as parse_error:
790
  logger.debug(f"Error parsing last_result: {parse_error}")
791
 
 
 
 
 
 
792
  # If we have a valid answer, use it (don't show error message)
793
  if has_valid_answer:
794
  logger.info(f"[UI] Error occurred but final answer already generated, displaying it without error message")
795
- yield last_result
 
 
 
 
 
 
 
796
  return
797
 
798
  # For GPU timeouts, try to use last result even if it's partial
799
  if is_gpu_timeout and last_result is not None:
800
  logger.info(f"[UI] GPU timeout occurred, using last available result")
801
- yield last_result
 
 
 
 
 
 
 
802
  return
803
 
804
  # Only show error for non-timeout errors when we have no valid answer
@@ -806,12 +857,26 @@ def create_demo():
806
  if is_gpu_timeout:
807
  logger.info(f"[UI] GPU timeout with no result, showing empty assistant message")
808
  updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": ""}]
809
- yield updated_history, ""
 
 
 
 
 
 
 
810
  else:
811
  # For other errors, show minimal error message only if no result
812
  error_display = f"⚠️ An error occurred: {error_str[:200]}"
813
  updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_display}]
814
- yield updated_history, ""
 
 
 
 
 
 
 
815
 
816
  submit_button.click(
817
  fn=stream_chat_with_model_check,
 
750
  request = MockRequest()
751
 
752
  # Model is loaded, proceed with stream_chat (no model loading here to save time)
753
+ # Note: We handle "BodyStreamBuffer was aborted" errors by catching stream disconnections
754
+ # and not attempting to yield after the client has disconnected
755
  last_result = None
756
+ stream_aborted = False
757
  try:
758
  for result in stream_chat(
759
  message, history, system_prompt, temperature, max_new_tokens,
 
762
  enable_clinical_intake, disable_agentic_reasoning, show_thoughts, request
763
  ):
764
  last_result = result
765
+ try:
766
+ yield result
767
+ except (GeneratorExit, StopIteration, RuntimeError) as stream_error:
768
+ # Stream was closed/aborted by client - don't try to yield again
769
+ error_msg_lower = str(stream_error).lower()
770
+ if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
771
+ logger.info(f"[UI] Stream was aborted by client, stopping gracefully")
772
+ stream_aborted = True
773
+ break
774
+ raise
775
+ except (GeneratorExit, StopIteration) as stream_exit:
776
+ # Stream was closed - this is normal, just log and exit
777
+ logger.info(f"[UI] Stream closed normally")
778
+ stream_aborted = True
779
+ return
780
  except Exception as e:
781
  # Handle any errors gracefully
782
  error_str = str(e)
783
  error_msg_lower = error_str.lower()
784
+
785
+ # Check if this is a stream abort error
786
+ is_stream_abort = (
787
+ 'bodystreambuffer' in error_msg_lower or
788
+ 'stream' in error_msg_lower and 'abort' in error_msg_lower or
789
+ 'connection' in error_msg_lower and 'abort' in error_msg_lower or
790
+ isinstance(e, (GeneratorExit, StopIteration, RuntimeError)) and 'abort' in error_msg_lower
791
+ )
792
+
793
+ if is_stream_abort:
794
+ logger.info(f"[UI] Stream was aborted (BodyStreamBuffer or similar): {error_str[:100]}")
795
+ stream_aborted = True
796
+ # If we have a result, it was already yielded, so just return
797
+ return
798
+
799
  is_gpu_timeout = 'gpu task aborted' in error_msg_lower or 'timeout' in error_msg_lower
800
 
801
  logger.error(f"Error in stream_chat_with_model_check: {error_str}")
 
821
  except Exception as parse_error:
822
  logger.debug(f"Error parsing last_result: {parse_error}")
823
 
824
+ # If stream was aborted, don't try to yield - just return
825
+ if stream_aborted:
826
+ logger.info(f"[UI] Stream was aborted, not yielding final result")
827
+ return
828
+
829
  # If we have a valid answer, use it (don't show error message)
830
  if has_valid_answer:
831
  logger.info(f"[UI] Error occurred but final answer already generated, displaying it without error message")
832
+ try:
833
+ yield last_result
834
+ except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
835
+ error_msg_lower = str(yield_error).lower()
836
+ if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
837
+ logger.info(f"[UI] Stream aborted while yielding final result, ignoring")
838
+ else:
839
+ raise
840
  return
841
 
842
  # For GPU timeouts, try to use last result even if it's partial
843
  if is_gpu_timeout and last_result is not None:
844
  logger.info(f"[UI] GPU timeout occurred, using last available result")
845
+ try:
846
+ yield last_result
847
+ except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
848
+ error_msg_lower = str(yield_error).lower()
849
+ if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
850
+ logger.info(f"[UI] Stream aborted while yielding timeout result, ignoring")
851
+ else:
852
+ raise
853
  return
854
 
855
  # Only show error for non-timeout errors when we have no valid answer
 
857
  if is_gpu_timeout:
858
  logger.info(f"[UI] GPU timeout with no result, showing empty assistant message")
859
  updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": ""}]
860
+ try:
861
+ yield updated_history, ""
862
+ except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
863
+ error_msg_lower = str(yield_error).lower()
864
+ if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
865
+ logger.info(f"[UI] Stream aborted while yielding empty message, ignoring")
866
+ else:
867
+ raise
868
  else:
869
  # For other errors, show minimal error message only if no result
870
  error_display = f"⚠️ An error occurred: {error_str[:200]}"
871
  updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_display}]
872
+ try:
873
+ yield updated_history, ""
874
+ except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
875
+ error_msg_lower = str(yield_error).lower()
876
+ if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
877
+ logger.info(f"[UI] Stream aborted while yielding error message, ignoring")
878
+ else:
879
+ raise
880
 
881
  submit_button.click(
882
  fn=stream_chat_with_model_check,