Spaces:
Running
on
Zero
Running
on
Zero
Y Phung Nguyen
commited on
Commit
·
d8e18ef
1
Parent(s):
8515412
Fix BodyStreamBuffer err
Browse files
ui.py
CHANGED
|
@@ -750,7 +750,10 @@ def create_demo():
|
|
| 750 |
request = MockRequest()
|
| 751 |
|
| 752 |
# Model is loaded, proceed with stream_chat (no model loading here to save time)
|
|
|
|
|
|
|
| 753 |
last_result = None
|
|
|
|
| 754 |
try:
|
| 755 |
for result in stream_chat(
|
| 756 |
message, history, system_prompt, temperature, max_new_tokens,
|
|
@@ -759,11 +762,40 @@ def create_demo():
|
|
| 759 |
enable_clinical_intake, disable_agentic_reasoning, show_thoughts, request
|
| 760 |
):
|
| 761 |
last_result = result
|
| 762 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
except Exception as e:
|
| 764 |
# Handle any errors gracefully
|
| 765 |
error_str = str(e)
|
| 766 |
error_msg_lower = error_str.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 767 |
is_gpu_timeout = 'gpu task aborted' in error_msg_lower or 'timeout' in error_msg_lower
|
| 768 |
|
| 769 |
logger.error(f"Error in stream_chat_with_model_check: {error_str}")
|
|
@@ -789,16 +821,35 @@ def create_demo():
|
|
| 789 |
except Exception as parse_error:
|
| 790 |
logger.debug(f"Error parsing last_result: {parse_error}")
|
| 791 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 792 |
# If we have a valid answer, use it (don't show error message)
|
| 793 |
if has_valid_answer:
|
| 794 |
logger.info(f"[UI] Error occurred but final answer already generated, displaying it without error message")
|
| 795 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
return
|
| 797 |
|
| 798 |
# For GPU timeouts, try to use last result even if it's partial
|
| 799 |
if is_gpu_timeout and last_result is not None:
|
| 800 |
logger.info(f"[UI] GPU timeout occurred, using last available result")
|
| 801 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 802 |
return
|
| 803 |
|
| 804 |
# Only show error for non-timeout errors when we have no valid answer
|
|
@@ -806,12 +857,26 @@ def create_demo():
|
|
| 806 |
if is_gpu_timeout:
|
| 807 |
logger.info(f"[UI] GPU timeout with no result, showing empty assistant message")
|
| 808 |
updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": ""}]
|
| 809 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 810 |
else:
|
| 811 |
# For other errors, show minimal error message only if no result
|
| 812 |
error_display = f"⚠️ An error occurred: {error_str[:200]}"
|
| 813 |
updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_display}]
|
| 814 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
|
| 816 |
submit_button.click(
|
| 817 |
fn=stream_chat_with_model_check,
|
|
|
|
| 750 |
request = MockRequest()
|
| 751 |
|
| 752 |
# Model is loaded, proceed with stream_chat (no model loading here to save time)
|
| 753 |
+
# Note: We handle "BodyStreamBuffer was aborted" errors by catching stream disconnections
|
| 754 |
+
# and not attempting to yield after the client has disconnected
|
| 755 |
last_result = None
|
| 756 |
+
stream_aborted = False
|
| 757 |
try:
|
| 758 |
for result in stream_chat(
|
| 759 |
message, history, system_prompt, temperature, max_new_tokens,
|
|
|
|
| 762 |
enable_clinical_intake, disable_agentic_reasoning, show_thoughts, request
|
| 763 |
):
|
| 764 |
last_result = result
|
| 765 |
+
try:
|
| 766 |
+
yield result
|
| 767 |
+
except (GeneratorExit, StopIteration, RuntimeError) as stream_error:
|
| 768 |
+
# Stream was closed/aborted by client - don't try to yield again
|
| 769 |
+
error_msg_lower = str(stream_error).lower()
|
| 770 |
+
if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
|
| 771 |
+
logger.info(f"[UI] Stream was aborted by client, stopping gracefully")
|
| 772 |
+
stream_aborted = True
|
| 773 |
+
break
|
| 774 |
+
raise
|
| 775 |
+
except (GeneratorExit, StopIteration) as stream_exit:
|
| 776 |
+
# Stream was closed - this is normal, just log and exit
|
| 777 |
+
logger.info(f"[UI] Stream closed normally")
|
| 778 |
+
stream_aborted = True
|
| 779 |
+
return
|
| 780 |
except Exception as e:
|
| 781 |
# Handle any errors gracefully
|
| 782 |
error_str = str(e)
|
| 783 |
error_msg_lower = error_str.lower()
|
| 784 |
+
|
| 785 |
+
# Check if this is a stream abort error
|
| 786 |
+
is_stream_abort = (
|
| 787 |
+
'bodystreambuffer' in error_msg_lower or
|
| 788 |
+
'stream' in error_msg_lower and 'abort' in error_msg_lower or
|
| 789 |
+
'connection' in error_msg_lower and 'abort' in error_msg_lower or
|
| 790 |
+
isinstance(e, (GeneratorExit, StopIteration, RuntimeError)) and 'abort' in error_msg_lower
|
| 791 |
+
)
|
| 792 |
+
|
| 793 |
+
if is_stream_abort:
|
| 794 |
+
logger.info(f"[UI] Stream was aborted (BodyStreamBuffer or similar): {error_str[:100]}")
|
| 795 |
+
stream_aborted = True
|
| 796 |
+
# If we have a result, it was already yielded, so just return
|
| 797 |
+
return
|
| 798 |
+
|
| 799 |
is_gpu_timeout = 'gpu task aborted' in error_msg_lower or 'timeout' in error_msg_lower
|
| 800 |
|
| 801 |
logger.error(f"Error in stream_chat_with_model_check: {error_str}")
|
|
|
|
| 821 |
except Exception as parse_error:
|
| 822 |
logger.debug(f"Error parsing last_result: {parse_error}")
|
| 823 |
|
| 824 |
+
# If stream was aborted, don't try to yield - just return
|
| 825 |
+
if stream_aborted:
|
| 826 |
+
logger.info(f"[UI] Stream was aborted, not yielding final result")
|
| 827 |
+
return
|
| 828 |
+
|
| 829 |
# If we have a valid answer, use it (don't show error message)
|
| 830 |
if has_valid_answer:
|
| 831 |
logger.info(f"[UI] Error occurred but final answer already generated, displaying it without error message")
|
| 832 |
+
try:
|
| 833 |
+
yield last_result
|
| 834 |
+
except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
|
| 835 |
+
error_msg_lower = str(yield_error).lower()
|
| 836 |
+
if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
|
| 837 |
+
logger.info(f"[UI] Stream aborted while yielding final result, ignoring")
|
| 838 |
+
else:
|
| 839 |
+
raise
|
| 840 |
return
|
| 841 |
|
| 842 |
# For GPU timeouts, try to use last result even if it's partial
|
| 843 |
if is_gpu_timeout and last_result is not None:
|
| 844 |
logger.info(f"[UI] GPU timeout occurred, using last available result")
|
| 845 |
+
try:
|
| 846 |
+
yield last_result
|
| 847 |
+
except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
|
| 848 |
+
error_msg_lower = str(yield_error).lower()
|
| 849 |
+
if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
|
| 850 |
+
logger.info(f"[UI] Stream aborted while yielding timeout result, ignoring")
|
| 851 |
+
else:
|
| 852 |
+
raise
|
| 853 |
return
|
| 854 |
|
| 855 |
# Only show error for non-timeout errors when we have no valid answer
|
|
|
|
| 857 |
if is_gpu_timeout:
|
| 858 |
logger.info(f"[UI] GPU timeout with no result, showing empty assistant message")
|
| 859 |
updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": ""}]
|
| 860 |
+
try:
|
| 861 |
+
yield updated_history, ""
|
| 862 |
+
except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
|
| 863 |
+
error_msg_lower = str(yield_error).lower()
|
| 864 |
+
if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
|
| 865 |
+
logger.info(f"[UI] Stream aborted while yielding empty message, ignoring")
|
| 866 |
+
else:
|
| 867 |
+
raise
|
| 868 |
else:
|
| 869 |
# For other errors, show minimal error message only if no result
|
| 870 |
error_display = f"⚠️ An error occurred: {error_str[:200]}"
|
| 871 |
updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_display}]
|
| 872 |
+
try:
|
| 873 |
+
yield updated_history, ""
|
| 874 |
+
except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
|
| 875 |
+
error_msg_lower = str(yield_error).lower()
|
| 876 |
+
if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
|
| 877 |
+
logger.info(f"[UI] Stream aborted while yielding error message, ignoring")
|
| 878 |
+
else:
|
| 879 |
+
raise
|
| 880 |
|
| 881 |
submit_button.click(
|
| 882 |
fn=stream_chat_with_model_check,
|