Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 15 days ago

Commit

d8e18ef

1 Parent(s): 8515412

Fix BodyStreamBuffer err

Browse files

Files changed (1) hide show

ui.py +70 -5

ui.py CHANGED Viewed

@@ -750,7 +750,10 @@ def create_demo():
                             request = MockRequest()
                     # Model is loaded, proceed with stream_chat (no model loading here to save time)
                     last_result = None
                     try:
                         for result in stream_chat(
                             message, history, system_prompt, temperature, max_new_tokens,
@@ -759,11 +762,40 @@ def create_demo():
                             enable_clinical_intake, disable_agentic_reasoning, show_thoughts, request
                         ):
                             last_result = result
-                            yield result
                     except Exception as e:
                         # Handle any errors gracefully
                         error_str = str(e)
                         error_msg_lower = error_str.lower()
                         is_gpu_timeout = 'gpu task aborted' in error_msg_lower or 'timeout' in error_msg_lower
                         logger.error(f"Error in stream_chat_with_model_check: {error_str}")
@@ -789,16 +821,35 @@ def create_demo():
                             except Exception as parse_error:
                                 logger.debug(f"Error parsing last_result: {parse_error}")
                         # If we have a valid answer, use it (don't show error message)
                         if has_valid_answer:
                             logger.info(f"[UI] Error occurred but final answer already generated, displaying it without error message")
-                            yield last_result
                             return
                         # For GPU timeouts, try to use last result even if it's partial
                         if is_gpu_timeout and last_result is not None:
                             logger.info(f"[UI] GPU timeout occurred, using last available result")
-                            yield last_result
                             return
                         # Only show error for non-timeout errors when we have no valid answer
@@ -806,12 +857,26 @@ def create_demo():
                         if is_gpu_timeout:
                             logger.info(f"[UI] GPU timeout with no result, showing empty assistant message")
                             updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": ""}]
-                            yield updated_history, ""
                         else:
                             # For other errors, show minimal error message only if no result
                             error_display = f"⚠️ An error occurred: {error_str[:200]}"
                             updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_display}]
-                            yield updated_history, ""
                 submit_button.click(
                     fn=stream_chat_with_model_check,

                             request = MockRequest()
                     # Model is loaded, proceed with stream_chat (no model loading here to save time)
+                    # Note: We handle "BodyStreamBuffer was aborted" errors by catching stream disconnections
+                    # and not attempting to yield after the client has disconnected
                     last_result = None
+                    stream_aborted = False
                     try:
                         for result in stream_chat(
                             message, history, system_prompt, temperature, max_new_tokens,
                             enable_clinical_intake, disable_agentic_reasoning, show_thoughts, request
                         ):
                             last_result = result
+                            try:
+                                yield result
+                            except (GeneratorExit, StopIteration, RuntimeError) as stream_error:
+                                # Stream was closed/aborted by client - don't try to yield again
+                                error_msg_lower = str(stream_error).lower()
+                                if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
+                                    logger.info(f"[UI] Stream was aborted by client, stopping gracefully")
+                                    stream_aborted = True
+                                    break
+                                raise
+                    except (GeneratorExit, StopIteration) as stream_exit:
+                        # Stream was closed - this is normal, just log and exit
+                        logger.info(f"[UI] Stream closed normally")
+                        stream_aborted = True
+                        return
                     except Exception as e:
                         # Handle any errors gracefully
                         error_str = str(e)
                         error_msg_lower = error_str.lower()
+                        # Check if this is a stream abort error
+                        is_stream_abort = (
+                            'bodystreambuffer' in error_msg_lower or
+                            'stream' in error_msg_lower and 'abort' in error_msg_lower or
+                            'connection' in error_msg_lower and 'abort' in error_msg_lower or
+                            isinstance(e, (GeneratorExit, StopIteration, RuntimeError)) and 'abort' in error_msg_lower
+                        )
+                        if is_stream_abort:
+                            logger.info(f"[UI] Stream was aborted (BodyStreamBuffer or similar): {error_str[:100]}")
+                            stream_aborted = True
+                            # If we have a result, it was already yielded, so just return
+                            return
                         is_gpu_timeout = 'gpu task aborted' in error_msg_lower or 'timeout' in error_msg_lower
                         logger.error(f"Error in stream_chat_with_model_check: {error_str}")
                             except Exception as parse_error:
                                 logger.debug(f"Error parsing last_result: {parse_error}")
+                        # If stream was aborted, don't try to yield - just return
+                        if stream_aborted:
+                            logger.info(f"[UI] Stream was aborted, not yielding final result")
+                            return
                         # If we have a valid answer, use it (don't show error message)
                         if has_valid_answer:
                             logger.info(f"[UI] Error occurred but final answer already generated, displaying it without error message")
+                            try:
+                                yield last_result
+                            except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
+                                error_msg_lower = str(yield_error).lower()
+                                if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
+                                    logger.info(f"[UI] Stream aborted while yielding final result, ignoring")
+                                else:
+                                    raise
                             return
                         # For GPU timeouts, try to use last result even if it's partial
                         if is_gpu_timeout and last_result is not None:
                             logger.info(f"[UI] GPU timeout occurred, using last available result")
+                            try:
+                                yield last_result
+                            except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
+                                error_msg_lower = str(yield_error).lower()
+                                if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
+                                    logger.info(f"[UI] Stream aborted while yielding timeout result, ignoring")
+                                else:
+                                    raise
                             return
                         # Only show error for non-timeout errors when we have no valid answer
                         if is_gpu_timeout:
                             logger.info(f"[UI] GPU timeout with no result, showing empty assistant message")
                             updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": ""}]
+                            try:
+                                yield updated_history, ""
+                            except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
+                                error_msg_lower = str(yield_error).lower()
+                                if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
+                                    logger.info(f"[UI] Stream aborted while yielding empty message, ignoring")
+                                else:
+                                    raise
                         else:
                             # For other errors, show minimal error message only if no result
                             error_display = f"⚠️ An error occurred: {error_str[:200]}"
                             updated_history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": error_display}]
+                            try:
+                                yield updated_history, ""
+                            except (GeneratorExit, StopIteration, RuntimeError) as yield_error:
+                                error_msg_lower = str(yield_error).lower()
+                                if 'abort' in error_msg_lower or 'stream' in error_msg_lower or 'buffer' in error_msg_lower:
+                                    logger.info(f"[UI] Stream aborted while yielding error message, ignoring")
+                                else:
+                                    raise
                 submit_button.click(
                     fn=stream_chat_with_model_check,