Spaces:

cosmosai471
/

come_onnn

Running

App Files Files Community

cosmosai471 commited on 13 days ago

Commit

127e5e0

verified ·

1 Parent(s): bb016b3

Update app.py

Browse files

Files changed (1) hide show

app.py +194 -178

app.py CHANGED Viewed

@@ -17,7 +17,12 @@ from io import BytesIO
 import numpy as np
 # --- CONFIGURATION & INITIALIZATION ---
-STT_DEVICE = "cpu"
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 AUDIO_DIR = "audio_outputs"
 DOC_DIR = "doc_outputs"
@@ -25,20 +30,22 @@ if not os.path.exists(AUDIO_DIR):
     os.makedirs(AUDIO_DIR)
 if not os.path.exists(DOC_DIR):
     os.makedirs(DOC_DIR)
 REPO_ID = "cosmosai471/Luna-v3"
 MODEL_FILE = "luna.gguf"
 LOCAL_MODEL_PATH = MODEL_FILE
 SYSTEM_PROMPT = (
-    "You are Luna, a helpful and friendly AI assistant. Your response must begin with two separate "
-    "tags: an Intent tag and a Confidence tag (0-100). Example: '[Intent: qa_general][Confidence: 85]'. "
-    "Your full response must follow these tags."
 )
 # --- TUNABLES / GUARDS ---
-CONFIDENCE_THRESHOLD = 30         # only trigger web-search fallback if confidence is less than this
-STREAM_CHAR_LIMIT = 35000         # hard cap on streaming response size (prevents runaway)
-STREAM_ITER_LIMIT = 20000         # hard cap on streaming token iterations
-MIN_MEANINGFUL_LENGTH = 20        # used when determining if a tool prompt is sufficient
 def safe_del(self):
     try:
@@ -69,71 +76,107 @@ except Exception as e:
     print(f"❌ Error loading Luna model: {e}")
     class DummyLLM:
         def create_completion(self, *args, **kwargs):
-            # yield one piece to mimic streaming
             yield {'choices': [{'text': '[Intent: qa_general][Confidence: 0] ERROR: Luna model failed to load. Check logs and resources.'}]}
     llm = DummyLLM()
 stt_pipe = None
 try:
-    stt_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=STT_DEVICE)
-    print(f"✅ Loaded Whisper-base on device: {STT_DEVICE}")
 except Exception as e:
     print(f"⚠️ Could not load Whisper. Voice chat disabled. Error: {e}")
 image_pipe = None
 try:
     VLM_MODEL_ID = "llava-hf/llava-1.5-7b-hf"
-    image_pipe = pipeline("image-to-text", model=VLM_MODEL_ID, device=STT_DEVICE)
-    print(f"✅ Loaded {VLM_MODEL_ID} for image processing.")
 except Exception as e:
     print(f"⚠️ Could not load VLM ({VLM_MODEL_ID}). Image chat disabled. Error: {e}")
 img_gen_pipe = None
 try:
     img_gen_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float32)
-    img_gen_pipe.to(STT_DEVICE)
-    print("✅ Loaded Stable Diffusion (v1-5) for image generation.")
 except Exception as e:
     print(f"⚠️ Could not load Image Generation pipeline. Image generation disabled. Error: {e}")
-# --- UTILITY FUNCTIONS ---
 def simulate_recording_delay():
     time.sleep(3)
     return None
-def remove_all_tags(text: str) -> str:
-    """Remove ALL bracketed Intent/Confidence tags and any plain 'Intent:' or 'Confidence:' lines.
-    This ensures tags never leak into the UI.
-    """
     if not text:
         return ""
-    # remove bracketed tags like [Intent: xyz] [Confidence: 85]
     text = re.sub(r'\[Intent:\s*[\w\-\_]+\]', '', text, flags=re.IGNORECASE)
     text = re.sub(r'\[Confidence:\s*\d{1,3}\]', '', text, flags=re.IGNORECASE)
-    # remove any lines that start with "Intent:" or "Confidence:" (plain text)
-    text = re.sub(r'(?im)^\s*Intent:\s*.*$', '', text)
-    text = re.sub(r'(?im)^\s*Confidence:\s*.*$', '', text)
-    # collapse multiple blank lines
-    text = re.sub(r'\n\s*\n+', '\n\n', text).strip()
     return text
-def clean_response_stream(raw_text: str) -> str:
-    """Cleans up raw response text and removes any tag artifacts (safe for UI)."""
-    if not raw_text:
         return ""
-    # Trim at common model separators to avoid carrying lots of leftover prompt text
-    truncated = re.split(r'\nUser:|\nAssistant:|</s>', raw_text, 1)[0].strip()
-    # Remove instruction tokens
-    truncated = re.sub(r'\[/?INST\]|\[/?s\]|\s*<action>.*?</action>\s*', '', truncated, flags=re.DOTALL).strip()
-    # Remove any explicit tags and any plain lines referencing Intent/Confidence
-    truncated = remove_all_tags(truncated)
-    # Deduplicate trivial repeated endings
-    words = truncated.split()
-    if len(words) > 4 and words[-2:] == words[-4:-2]:
-        truncated = ' '.join(words[:-2])
-    return truncated.strip()
 def web_search_tool(query: str) -> str:
     time.sleep(1.2)
@@ -141,11 +184,13 @@ def web_search_tool(query: str) -> str:
     return f"\n\n🌐 **Web Search Results for '{query}':** I found supplemental info to help answer this."
 def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> str:
-    """Checks confidence and optionally augments via web search. This function never returns raw tags."""
-    # Find only the first confidence occurrence (if any)
     confidence_match = re.search(r'\[Confidence:\s*([0-9]{1,3})\]', raw_response_with_tags, flags=re.IGNORECASE)
-    cleaned_response = clean_response_stream(raw_response_with_tags)
     if confidence_match:
         try:
             confidence_score = int(confidence_match.group(1))
@@ -153,35 +198,37 @@ def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> st
         except Exception:
             confidence_score = 0
     else:
-        # heuristic: if the cleaned response is short or empty, regard as low confidence
-        confidence_score = 10 if not cleaned_response or len(cleaned_response) < 30 else 85
     if confidence_score < CONFIDENCE_THRESHOLD:
-        # very low confidence -> append search snippet
-        print(f"Low confidence ({confidence_score}%) detected (threshold={CONFIDENCE_THRESHOLD}). Using web fallback.")
-        search_snippet = web_search_tool(prompt)
-        if "error" in cleaned_response.lower() or confidence_score <= 5:
-            final_response = f"I apologize — I couldn't produce a reliable answer (Confidence: {confidence_score}%). {search_snippet}"
         else:
-            final_response = f"{cleaned_response}\n\n{search_snippet}\n\nIf you'd like, I can attempt a deeper search or try again."
     else:
-        final_response = cleaned_response
-    # Ensure final_response contains no tags
-    final_response = remove_all_tags(final_response)
-    return final_response
-def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
-    """Perform VQA via the image_pipe. Robust to different pipeline return types.
-    Returns:
-      - prompt_injection: text to include in LLM prompt describing the VQA output or error guidance
-      - success: whether the VLM produced a clear answer
     """
     global image_pipe
     success = False
     if image_pipe is None:
-        return f"[Image Processing Error: VLM model not loaded.] **User Query:** {message}", success
     image = None
     try:
@@ -190,22 +237,21 @@ def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
         elif isinstance(image_data_or_path, np.ndarray):
             image = Image.fromarray(image_data_or_path).convert("RGB")
         else:
-            # Unknown image container (e.g., bytes)
             try:
                 image = Image.open(BytesIO(image_data_or_path)).convert("RGB")
             except Exception:
                 image = None
         if image is None:
-            return f"[Image Processing Error: Could not open image.] **User Query:** {message}", success
         vqa_prompt = f"USER: <image>\n{message}\nASSISTANT:"
-        # Some pipelines accept (image, prompt=...), some accept kwargs. Try both patterns and be defensive.
         results = None
         try:
             results = image_pipe(image, prompt=vqa_prompt)
         except TypeError:
-            # fallback signature
             try:
                 results = image_pipe(image)
             except Exception as e:
@@ -215,52 +261,50 @@ def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
             print(f"Image pipeline call error: {e}")
             results = None
-        raw_vlm_output = ""
         if results is None:
-            raw_vlm_output = ""
         elif isinstance(results, dict):
-            # some pipelines return dict with 'generated_text' or 'text'
-            raw_vlm_output = results.get('generated_text') or results.get('text') or ""
         elif isinstance(results, list):
-            # list of dicts or strings
             first = results[0]
             if isinstance(first, dict):
-                raw_vlm_output = first.get('generated_text') or first.get('text') or ""
             elif isinstance(first, str):
-                raw_vlm_output = first
         elif isinstance(results, str):
-            raw_vlm_output = results
         else:
-            # unknown shape -> convert to string safe
             try:
-                raw_vlm_output = str(results)
             except Exception:
-                raw_vlm_output = ""
-        # extract assistant part
-        vqa_response = raw_vlm_output.split("ASSISTANT:")[-1].strip() if raw_vlm_output else ""
-        # If no meaningful vqa_response, return a helpful fallback message
         if not vqa_response or len(vqa_response) < 10:
             vqa_response = (
-                "VQA analysis did not return a clear answer. The image might be unclear or the request ambiguous. "
-                "Please try re-uploading a clearer image, crop to the subject, or add a short instruction about what you'd like answered."
             )
             success = False
         else:
             success = True
-        # Always remove any tags to prevent leaking
-        vqa_response = remove_all_tags(vqa_response)
-        prompt_injection = f"**VQA Analysis:** {vqa_response}\n\n**User Query:** {message}"
         return prompt_injection, success
     except Exception as e:
-        print(f"Image Pipeline Exception: {e}")
-        return f"[Image Processing Error: {e}] **User Query:** {message}", success
 def transcribe_audio(audio_file_path: str) -> Tuple[str, str, gr.update, gr.update, bool, gr.update]:
-    if stt_pipe is None or audio_file_path is None:
         error_msg = "Error: Whisper model failed to load or no audio recorded."
         return "", error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), False, gr.update(visible=False)
     try:
@@ -293,6 +337,7 @@ def text_to_audio(text: str, is_voice_chat: bool) -> str or None:
             return None
     return None
 INTENT_STATUS_MAP = {
     "code_generate": "Analyzing requirements and drafting code 💻...",
     "code_explain": "Reviewing code logic and writing explanation 💡...",
@@ -308,21 +353,22 @@ INTENT_STATUS_MAP = {
 }
 def get_intent_status(raw_response: str, is_vqa_flow: bool) -> Tuple[str, str, str]:
-    """Parses intent (first occurrence only) and returns (intent, status, cleaned_display_text).
-    Importantly: this DOES NOT expose any tags — we remove them for display.
     """
     intent_match = re.search(r'\[Intent:\s*([\w\-\_]+)\]', raw_response, re.IGNORECASE)
     intent = intent_match.group(1).lower() if intent_match else "default"
     if is_vqa_flow:
         intent = "vqa"
-    # Clean raw_response for display: remove all tags and noisy prompt leftovers
-    cleaned_text = clean_response_stream(raw_response)
     status = INTENT_STATUS_MAP.get(intent, INTENT_STATUS_MAP["default"])
     return intent, status, cleaned_text
 def generate_file_content(content: str, history: List[Dict[str, str]], file_type: str):
-    """Generates a file and writes it to disk. If content insufficient, asks for clarification."""
     file_path = None
     try:
         if not content or len(content.strip()) < MIN_MEANINGFUL_LENGTH:
@@ -370,15 +416,14 @@ def generate_file_content(content: str, history: List[Dict[str, str]], file_type
         file_path = None
     return history, file_path
 # --- CORE GENERATOR FUNCTION ---
 def chat_generator(message_from_input: str, image_input_data: Any, history: List[Dict[str, str]], stop_signal: bool, is_voice_chat: bool) -> Any:
     """
-    - Assistant entry is appended ONLY when generation begins (avoids empty assistant box).
-    - Strict caps on streaming to avoid infinite printing.
-    - Strict removal of tags before any content is written to history for UI.
     """
-    # Validate last item is user's message
     if not history or history[-1]['role'] != 'user':
         yield history, False, "Error: Generator called in unexpected state (no user message found).", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
@@ -386,10 +431,10 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
     last_user_index = len(history) - 1
     original_message = history[last_user_index]['content'] or ""
-    # Detect VQA flow
     is_vqa_flow = False
     if isinstance(image_input_data, str):
-        is_vqa_flow = image_input_data != ""
     elif isinstance(image_input_data, np.ndarray):
         is_vqa_flow = image_input_data.size > 0
     else:
@@ -399,29 +444,28 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
     llm_input_message = original_message
     if is_vqa_flow:
         processed_message, vqa_success = process_image(image_input_data, original_message)
-        # mark user's entry (but keep original message around)
-        history[last_user_index]['content'] = f"[IMAGE RECEIVED] {original_message}"
         llm_input_message = processed_message
-    # Build prompt
     prompt = f"SYSTEM: {SYSTEM_PROMPT}\n"
     for item in history[:-1]:
         role = item['role'].upper()
         content = item['content'] or ""
         if role == "ASSISTANT":
-            # ensure assistant content used in prompt still includes tags if model expects them (we don't alter)
             prompt += f"LUNA: {content}\n"
         elif role == "USER":
             prompt += f"USER: {content}\n"
     prompt += f"USER: {llm_input_message}\nLUNA: "
-    # Add assistant entry now (so it appears only when generation starts)
     assistant_initial_text = "✨ Luna is starting to think..."
     history.append({"role": "assistant", "content": assistant_initial_text})
-    # Early UI update: show thinking state
     yield history, stop_signal, assistant_initial_text, gr.update(value="", interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
-    time.sleep(0.15)
     full_response = ""
     current_intent = "default"
@@ -434,19 +478,18 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
             echo=False, stream=True, temperature=0.7
         )
     except Exception as e:
-        error_text = f"❌ Error generating response: {e}"
-        history[-1]['content'] = error_text
-        yield history, False, error_text, gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
-    # Stream tokens with strict caps and tag-removal for UI
     try:
         for output in stream:
             iter_count += 1
             if iter_count > STREAM_ITER_LIMIT:
-                # safety abort
-                full_response += "\n\n[Stream stopped: reached iteration limit]"
-                print("Stream aborted: iteration limit reached.")
                 break
             token = output["choices"][0].get("text", "")
@@ -454,41 +497,36 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
                 token = str(token)
             full_response += token
-            # safety cap on characters to prevent runaway printing
             if len(full_response) > STREAM_CHAR_LIMIT:
-                full_response = full_response[:STREAM_CHAR_LIMIT] + "\n\n[Truncated: stream length limit reached]"
-                print("Stream truncated: char limit reached.")
                 break
-            # parse intent/status and cleaned display without exposing tags
-            current_intent, current_hint, cleaned_display = get_intent_status(full_response, is_vqa_flow and vqa_success)
-            # enforce tag-suppression: cleaned_display MUST NOT contain tag patterns
-            cleaned_display = remove_all_tags(cleaned_display)
-            # guarantee non-empty display while streaming
-            if not cleaned_display.strip():
-                cleaned_display = "✨ Luna is forming a reply..."
-            history[-1]['content'] = cleaned_display
             yield history, stop_signal, current_hint, gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
     except Exception as e:
-        # On exception, salvage partial output
-        _, _, final_clean = get_intent_status(full_response, is_vqa_flow and vqa_success)
-        final_clean = remove_all_tags(final_clean) or f"⚠️ Streaming interrupted: {e}"
-        history[-1]['content'] = final_clean
         yield history, False, f"⚠️ Streaming interrupted: {e}", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=True), image_input_data, gr.update(), gr.update()
         return
-    # POST-PROCESSING & TOOL EXECUTION
     file_download_path = None
     _, _, content_for_tool = get_intent_status(full_response, is_vqa_flow and vqa_success)
-    content_for_tool = remove_all_tags(content_for_tool)  # ensure no tags in tool prompts
-    # Handle tool intents, but require sufficient content; otherwise ask for clarification
     if current_intent == "image_generate":
         if not content_for_tool or len(content_for_tool.strip()) < MIN_MEANINGFUL_LENGTH:
-            history[-1]['content'] = "I detected an image generation intent but didn't get enough details. Please give a short prompt (e.g. 'a red bicycle in a park at sunrise')."
         else:
             history[-1]['content'] = INTENT_STATUS_MAP[current_intent]
             yield history, stop_signal, history[-1]['content'], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
@@ -504,47 +542,41 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
     elif current_intent == "ppt_generate":
         if not content_for_tool or len(content_for_tool.strip()) < MIN_MEANINGFUL_LENGTH:
-            history[-1]['content'] = "I can make a presentation — please give a title and 3–5 bullet points."
         else:
             history[-1]['content'] = INTENT_STATUS_MAP[current_intent]
             yield history, stop_signal, history[-1]['content'], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
             history, file_download_path = generate_file_content(content_for_tool, history, "ppt")
     elif current_intent == "open_google":
-        final_cleaned_response = (content_for_tool or "").strip() + "\n\n🔗 **Action:** [Search Google](https://www.google.com/search?q=open+google+simulated+search)"
-        history[-1]['content'] = remove_all_tags(final_cleaned_response)
     elif current_intent == "open_camera":
-        final_cleaned_response = (content_for_tool or "").strip() + "\n\n📸 **Action:** Use the 'Google Lens' button to capture an image."
-        history[-1]['content'] = remove_all_tags(final_cleaned_response)
     else:
-        # default path: evaluate confidence and optionally augment with web search
         final_response_content = check_confidence_and_augment(full_response, original_message)
         history[-1]['content'] = final_response_content
-    # Final defensive fallback
     if not history[-1]['content'] or not str(history[-1]['content']).strip():
-        history[-1]['content'] = "Sorry — I couldn't produce a good response. Can you rephrase or give more details?"
-    # convert to audio if requested
     audio_file_path = text_to_audio(history[-1]['content'], is_voice_chat)
     hint = "✅ Response generated."
     yield history, False, hint, gr.update(interactive=True), gr.update(value="↑", interactive=True), audio_file_path, False, gr.update(visible=True), gr.update(value=None), gr.update(), file_download_path
-# --- GRADIO WRAPPERS FOR UI ACTIONS ---
 def toggle_menu(current_visibility: bool) -> Tuple[bool, gr.update, gr.update, gr.update]:
     new_visibility = not current_visibility
     return new_visibility, gr.update(visible=new_visibility), gr.update(visible=False), gr.update(value="⬇️" if new_visibility else "➕")
 def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]]]:
-    """
-    Appends only the USER message to chat_history. Assistant entry is appended inside chat_generator
-    once generation starts (avoids empty assistant box).
-    """
     has_text = bool(user_message and user_message.strip())
     has_image = False
     if isinstance(staged_image_input, str):
@@ -557,13 +589,12 @@ def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_imag
     if not has_text and not has_image:
         return user_message, chat_history
-    # Prevent double-send if assistant already generating
     if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] and "thinking" in chat_history[-1]['content'].lower():
         return user_message, chat_history
     user_message_to_add = "Analyzing Staged Media." if (not has_text and has_image) else user_message.strip()
-    chat_history.append({"role": "user", "content": user_message_to_add})
-    # DO NOT append assistant here
     return "", chat_history
 def stage_file_upload(file_path: str) -> Tuple[Any, str, gr.update, gr.update]:
@@ -586,7 +617,7 @@ def manual_fact_check(history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str
         return history, "Error: Could not find query.", gr.update(visible=False)
     web_results = web_search_tool(last_user_prompt)
     new_history = list(history)
-    new_history[-1]['content'] += web_results
     return new_history, "✅ Double-checked with web facts.", gr.update(visible=False)
 def auto_capture_camera(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]], Any, gr.update, gr.update, gr.update, gr.update, gr.update]:
@@ -595,15 +626,14 @@ def auto_capture_camera(user_message: str, chat_history: List[Dict[str, str]], s
         chat_history[-1]['content'] = "📸 Preparing camera capture..."
     return "", chat_history, staged_image_input, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value="📸 Capturing in 3 seconds...", interactive=False), gr.update(value="➕")
-# --- GRADIO INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     stop_signal = gr.State(value=False)
     is_voice_chat = gr.State(value=False)
     staged_image = gr.State(value=None)
     menu_visible_state = gr.State(value=False)
-    gr.HTML("<h1 style='text-align: center; color: #4B0082;'>🌙 Luna Chat Space</h1>")
     hint_box = gr.Textbox(value="Ask anything", lines=1, show_label=False, interactive=False, placeholder="Luna's Action...", visible=True)
     file_download_output = gr.File(label="Generated File", visible=False)
@@ -635,17 +665,13 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     output_components = [chatbot, stop_signal, hint_box, txt, combined_btn, audio_output, is_voice_chat, fact_check_btn_row, staged_image, file_input, file_download_output]
-    # --- WIRING ---
-    btn_menu.click(
-        fn=toggle_menu, inputs=[menu_visible_state], outputs=[menu_visible_state, menu_options_row, fact_check_btn_row, btn_menu], queue=False
-    )
     def prepare_file_upload(): return gr.update(visible=False), gr.update(value="➕"), gr.update(visible=False), gr.update(interactive=True), gr.update(value="")
     btn_add_files.click(fn=prepare_file_upload, inputs=[], outputs=[menu_options_row, btn_menu, fact_check_btn_row, file_input, txt], queue=False)
-    file_input.change(
-        fn=stage_file_upload, inputs=[file_input], outputs=[staged_image, hint_box, txt, file_input], queue=False
-    )
     btn_take_photo.click(
         fn=lambda: (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "📸 Camera Active. Capture an image.", gr.update(value="➕")),
@@ -677,24 +703,14 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     generator_inputs = [txt, staged_image, chatbot, stop_signal, is_voice_chat]
-    txt.submit(
-        fn=user_turn, inputs=[txt, chatbot, staged_image], outputs=[txt, chatbot], queue=False
-    ).then(
         fn=chat_generator, inputs=generator_inputs, outputs=output_components, queue=True
-    ).then(
-        fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
-    )
-    combined_btn.click(
-        fn=user_turn, inputs=[txt, chatbot, staged_image], outputs=[txt, chatbot], queue=False
-    ).then(
         fn=chat_generator, inputs=generator_inputs, outputs=output_components, queue=True
-    ).then(
-        fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
-    )
-    btn_fact_check.click(
-        fn=manual_fact_check, inputs=[chatbot], outputs=[chatbot, hint_box, fact_check_btn_row], queue=True
-    )
 demo.queue(max_size=20).launch(server_name="0.0.0.0")

 import numpy as np
 # --- CONFIGURATION & INITIALIZATION ---
+# Use string 'cpu' or GPU index string/int like '0'
+USER_DEVICE = "cpu"  # keep as "cpu" on CPU-only hosts; change to "0" for GPU 0
+# convert to values pipelines expect
+PIPELINE_DEVICE = -1 if str(USER_DEVICE).lower() == "cpu" else int(USER_DEVICE)
+TORCH_DEVICE = torch.device("cuda") if torch.cuda.is_available() and PIPELINE_DEVICE != -1 else torch.device("cpu")
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 AUDIO_DIR = "audio_outputs"
 DOC_DIR = "doc_outputs"
     os.makedirs(AUDIO_DIR)
 if not os.path.exists(DOC_DIR):
     os.makedirs(DOC_DIR)
 REPO_ID = "cosmosai471/Luna-v3"
 MODEL_FILE = "luna.gguf"
 LOCAL_MODEL_PATH = MODEL_FILE
+# Note: model is expected to prefix outputs with [Intent: ...][Confidence: ...] but user wants those strictly hidden.
 SYSTEM_PROMPT = (
+    "You are Luna, a helpful and friendly AI assistant. For internal tracing you may place Intent/Confidence tags, "
+    "but DO NOT expose these tags in the user-facing response. Any Intent/Confidence/Action metadata must be kept internal."
 )
 # --- TUNABLES / GUARDS ---
+CONFIDENCE_THRESHOLD = 30         # trigger web-search fallback only under this confidence
+STREAM_CHAR_LIMIT = 35000         # cap streaming characters
+STREAM_ITER_LIMIT = 20000         # cap streaming iterations
+MIN_MEANINGFUL_LENGTH = 20        # min length for file-generation prompts
 def safe_del(self):
     try:
     print(f"❌ Error loading Luna model: {e}")
     class DummyLLM:
         def create_completion(self, *args, **kwargs):
             yield {'choices': [{'text': '[Intent: qa_general][Confidence: 0] ERROR: Luna model failed to load. Check logs and resources.'}]}
     llm = DummyLLM()
+# transformer's pipeline expects device int: -1 for CPU
 stt_pipe = None
 try:
+    stt_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=PIPELINE_DEVICE)
+    print(f"✅ Loaded Whisper-base on device: {USER_DEVICE}")
 except Exception as e:
     print(f"⚠️ Could not load Whisper. Voice chat disabled. Error: {e}")
 image_pipe = None
 try:
     VLM_MODEL_ID = "llava-hf/llava-1.5-7b-hf"
+    image_pipe = pipeline("image-to-text", model=VLM_MODEL_ID, device=PIPELINE_DEVICE)
+    print(f"✅ Loaded {VLM_MODEL_ID} for image processing (device={USER_DEVICE}).")
 except Exception as e:
     print(f"⚠️ Could not load VLM ({VLM_MODEL_ID}). Image chat disabled. Error: {e}")
 img_gen_pipe = None
 try:
     img_gen_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float32)
+    img_gen_pipe.to(TORCH_DEVICE)
+    print(f"✅ Loaded Stable Diffusion and moved to {TORCH_DEVICE}.")
 except Exception as e:
     print(f"⚠️ Could not load Image Generation pipeline. Image generation disabled. Error: {e}")
+# --- STRICT SANITIZERS & UTILITIES ---
 def simulate_recording_delay():
     time.sleep(3)
     return None
+def remove_bracketed_tags(text: str) -> str:
+    """Remove bracketed tags like [Intent: ...] [Confidence: ...] exactly."""
     if not text:
         return ""
     text = re.sub(r'\[Intent:\s*[\w\-\_]+\]', '', text, flags=re.IGNORECASE)
     text = re.sub(r'\[Confidence:\s*\d{1,3}\]', '', text, flags=re.IGNORECASE)
+    text = re.sub(r'\[Action:\s*[^\]]+\]', '', text, flags=re.IGNORECASE)
     return text
+def remove_plain_labels_lines(text: str) -> str:
+    """Remove plain lines or labels like 'Intent : ...', 'Confidence: 99', 'Action: ...'."""
+    if not text:
+        return ""
+    text = re.sub(r'(?im)^\s*Intent\s*[:\-]\s*.*$', '', text)
+    text = re.sub(r'(?im)^\s*Confidence\s*[:\-]\s*.*$', '', text)
+    text = re.sub(r'(?im)^\s*Action\s*[:\-]\s*.*$', '', text)
+    return text
+def remove_word_number_patterns(text: str) -> str:
+    """
+    Remove patterns like 'greeting 99 2. goodbye 99' or 'greeting 99' or 'label 100 0'.
+    This aggressively strips sequences of word tokens followed by small-digit numbers.
+    """
+    if not text:
         return ""
+    # Pattern: word (letters, underscore, hyphen) followed by one or more numbers (1-3 digits),
+    # possibly repeated and possibly followed by punctuation.
+    # We will repeatedly remove such occurrences until nothing matches to avoid chained dumps.
+    pattern = re.compile(r'\b[a-zA-Z_\-]{2,40}\b(?:\s+\d{1,3}\b)+', flags=re.IGNORECASE)
+    prev = None
+    new = text
+    # iterative removal to handle multiple occurrences
+    while prev != new:
+        prev = new
+        new = pattern.sub('', new)
+    # also remove isolated numeric sequences that remain on their own
+    new = re.sub(r'\b\d{1,3}(?:\s+\d{1,3})*\b', '', new)
+    return new
+def collapse_whitespace_and_punct(text: str) -> str:
+    if not text:
+        return ""
+    # collapse multiple newlines and trim whitespace
+    text = re.sub(r'\n\s*\n+', '\n\n', text)
+    # remove excessive spaces
+    text = re.sub(r'[ \t]{2,}', ' ', text)
+    # trim leading/trailing
+    return text.strip()
+def strict_sanitize_for_ui(raw: str) -> str:
+    """
+    The final strict sanitizer that ensures NOTHING resembling Intent/Confidence/Action/
+    word-number dumps reaches the UI.
+    """
+    if not raw:
+        return ""
+    s = raw
+    s = remove_bracketed_tags(s)
+    s = remove_plain_labels_lines(s)
+    s = remove_word_number_patterns(s)
+    s = collapse_whitespace_and_punct(s)
+    # final guard to remove leftover tokens like 'Intent' or 'Confidence' anywhere
+    s = re.sub(r'(?i)\bIntent\b', '', s)
+    s = re.sub(r'(?i)\bConfidence\b', '', s)
+    s = re.sub(r'(?i)\bAction\b', '', s)
+    s = collapse_whitespace_and_punct(s)
+    return s.strip()
 def web_search_tool(query: str) -> str:
     time.sleep(1.2)
     return f"\n\n🌐 **Web Search Results for '{query}':** I found supplemental info to help answer this."
 def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> str:
+    """
+    Internal: parse confidence if present (for logic only), but never display it. If fallback triggered,
+    append web results to sanitized response.
+    """
+    cleaned_for_logic = remove_bracketed_tags(raw_response_with_tags)  # keep for length heuristic
+    # extract confidence if any (internal only)
     confidence_match = re.search(r'\[Confidence:\s*([0-9]{1,3})\]', raw_response_with_tags, flags=re.IGNORECASE)
     if confidence_match:
         try:
             confidence_score = int(confidence_match.group(1))
         except Exception:
             confidence_score = 0
     else:
+        # heuristic
+        cleaned_no_tags = strict_sanitize_for_ui(cleaned_for_logic)
+        confidence_score = 10 if not cleaned_no_tags or len(cleaned_no_tags) < 30 else 85
+    # If low, augment with web snippet; final output is always sanitized
     if confidence_score < CONFIDENCE_THRESHOLD:
+        print(f"[internal] Low confidence ({confidence_score}%) detected -> using web fallback")
+        supplement = web_search_tool(prompt)
+        out = strict_sanitize_for_ui(cleaned_for_logic)
+        if not out:
+            out = "I couldn't generate a reliable answer. " + strict_sanitize_for_ui(supplement)
         else:
+            out = out + "\n\n" + strict_sanitize_for_ui(supplement)
     else:
+        out = strict_sanitize_for_ui(cleaned_for_logic)
+    # final guard: don't return empty
+    out = out or "Sorry — I couldn't produce a good answer. Could you rephrase or give more details?"
+    return out
+# --- IMAGE / VQA PROCESSING (robust) ---
+def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
+    """
+    Uses image_pipe to produce VQA text. Returns prompt-injection (safe) + success flag.
+    This function is robust to multiple pipeline return shapes.
     """
     global image_pipe
     success = False
     if image_pipe is None:
+        return f"[Image Processing Error: VLM model not loaded.] **User Query:** {message}", False
     image = None
     try:
         elif isinstance(image_data_or_path, np.ndarray):
             image = Image.fromarray(image_data_or_path).convert("RGB")
         else:
+            # bytes or file-like
             try:
                 image = Image.open(BytesIO(image_data_or_path)).convert("RGB")
             except Exception:
                 image = None
         if image is None:
+            return f"[Image Processing Error: Could not open image.] **User Query:** {message}", False
         vqa_prompt = f"USER: <image>\n{message}\nASSISTANT:"
+        # defensive calls: some pipelines accept prompt kwarg, some don't
         results = None
         try:
             results = image_pipe(image, prompt=vqa_prompt)
         except TypeError:
             try:
                 results = image_pipe(image)
             except Exception as e:
             print(f"Image pipeline call error: {e}")
             results = None
+        raw_text = ""
         if results is None:
+            raw_text = ""
         elif isinstance(results, dict):
+            raw_text = results.get("generated_text") or results.get("text") or ""
         elif isinstance(results, list):
             first = results[0]
             if isinstance(first, dict):
+                raw_text = first.get("generated_text") or first.get("text") or ""
             elif isinstance(first, str):
+                raw_text = first
         elif isinstance(results, str):
+            raw_text = results
         else:
             try:
+                raw_text = str(results)
             except Exception:
+                raw_text = ""
+        # pick assistant section if available
+        vqa_response = raw_text.split("ASSISTANT:")[-1].strip() if raw_text else ""
+        vqa_response = strict_sanitize_for_ui(vqa_response)
         if not vqa_response or len(vqa_response) < 10:
             vqa_response = (
+                "VQA analysis didn't return a clear answer. The image might be unclear or the question ambiguous. "
+                "Please re-upload a clearer image, crop to the subject, or give a short instruction about what you'd like answered."
             )
             success = False
         else:
             success = True
+        # Return safe, sanitized prompt injection for LLM
+        prompt_injection = f"**VQA Analysis:** {vqa_response}\n\n**User Query:** {strict_sanitize_for_ui(message)}"
         return prompt_injection, success
     except Exception as e:
+        print(f"Image processing exception: {e}")
+        return f"[Image Processing Error: {e}] **User Query:** {strict_sanitize_for_ui(message)}", False
+# --- AUDIO / TTS ---
 def transcribe_audio(audio_file_path: str) -> Tuple[str, str, gr.update, gr.update, bool, gr.update]:
+    if stt_pipe is None or not audio_file_path:
         error_msg = "Error: Whisper model failed to load or no audio recorded."
         return "", error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), False, gr.update(visible=False)
     try:
             return None
     return None
+# --- INTENT STATUS MAP (internal only) ---
 INTENT_STATUS_MAP = {
     "code_generate": "Analyzing requirements and drafting code 💻...",
     "code_explain": "Reviewing code logic and writing explanation 💡...",
 }
 def get_intent_status(raw_response: str, is_vqa_flow: bool) -> Tuple[str, str, str]:
+    """
+    Internal parsing: returns (intent, status, cleaned_display_text).
+    cleaned_display_text is strictly sanitized for UI (no tags, no word-number dumps).
     """
     intent_match = re.search(r'\[Intent:\s*([\w\-\_]+)\]', raw_response, re.IGNORECASE)
     intent = intent_match.group(1).lower() if intent_match else "default"
     if is_vqa_flow:
         intent = "vqa"
+    # produce sanitized display text
+    cleaned_text = strict_sanitize_for_ui(raw_response)
     status = INTENT_STATUS_MAP.get(intent, INTENT_STATUS_MAP["default"])
     return intent, status, cleaned_text
+# --- FILE / IMAGE GENERATION ---
 def generate_file_content(content: str, history: List[Dict[str, str]], file_type: str):
     file_path = None
     try:
         if not content or len(content.strip()) < MIN_MEANINGFUL_LENGTH:
         file_path = None
     return history, file_path
 # --- CORE GENERATOR FUNCTION ---
 def chat_generator(message_from_input: str, image_input_data: Any, history: List[Dict[str, str]], stop_signal: bool, is_voice_chat: bool) -> Any:
     """
+    - assistant entry is appended only when generation actually starts (no empty box).
+    - streaming is strictly sanitized at every step.
+    - intent/confidence/action NEVER reach UI (aggressively stripped).
     """
+    # Expect last entry to be user
     if not history or history[-1]['role'] != 'user':
         yield history, False, "Error: Generator called in unexpected state (no user message found).", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
     last_user_index = len(history) - 1
     original_message = history[last_user_index]['content'] or ""
+    # detect VQA flow
     is_vqa_flow = False
     if isinstance(image_input_data, str):
+        is_vqa_flow = bool(image_input_data)
     elif isinstance(image_input_data, np.ndarray):
         is_vqa_flow = image_input_data.size > 0
     else:
     llm_input_message = original_message
     if is_vqa_flow:
         processed_message, vqa_success = process_image(image_input_data, original_message)
+        # annotate user's recorded message but preserve original for logic
+        history[last_user_index]['content'] = f"[IMAGE RECEIVED] {strict_sanitize_for_ui(original_message)}"
         llm_input_message = processed_message
+    # build prompt
     prompt = f"SYSTEM: {SYSTEM_PROMPT}\n"
     for item in history[:-1]:
         role = item['role'].upper()
         content = item['content'] or ""
         if role == "ASSISTANT":
             prompt += f"LUNA: {content}\n"
         elif role == "USER":
             prompt += f"USER: {content}\n"
     prompt += f"USER: {llm_input_message}\nLUNA: "
+    # append assistant entry only now
     assistant_initial_text = "✨ Luna is starting to think..."
     history.append({"role": "assistant", "content": assistant_initial_text})
+    # initial UI hint
     yield history, stop_signal, assistant_initial_text, gr.update(value="", interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
+    time.sleep(0.12)
     full_response = ""
     current_intent = "default"
             echo=False, stream=True, temperature=0.7
         )
     except Exception as e:
+        err = f"❌ Error generating response: {e}"
+        history[-1]['content'] = strict_sanitize_for_ui(err)
+        yield history, False, err, gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
+    # stream tokens; strictly sanitize and cap
     try:
         for output in stream:
             iter_count += 1
             if iter_count > STREAM_ITER_LIMIT:
+                full_response += "\n\n[Stream aborted: iteration limit reached]"
+                print("Stream aborted by iter limit.")
                 break
             token = output["choices"][0].get("text", "")
                 token = str(token)
             full_response += token
             if len(full_response) > STREAM_CHAR_LIMIT:
+                full_response = full_response[:STREAM_CHAR_LIMIT] + "\n\n[Truncated: length limit reached]"
+                print("Stream truncated by char limit.")
                 break
+            # extract intent/status for internal UX hints but NEVER show tags
+            current_intent, current_hint, interim = get_intent_status(full_response, is_vqa_flow and vqa_success)
+            # ALWAYS sanitize interim for UI
+            interim_ui = strict_sanitize_for_ui(interim)
+            if not interim_ui:
+                interim_ui = "✨ Luna is forming a reply..."
+            history[-1]['content'] = interim_ui
             yield history, stop_signal, current_hint, gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
     except Exception as e:
+        _, _, salvage = get_intent_status(full_response, is_vqa_flow and vqa_success)
+        salvage_ui = strict_sanitize_for_ui(salvage) or f"⚠️ Streaming interrupted: {e}"
+        history[-1]['content'] = salvage_ui
         yield history, False, f"⚠️ Streaming interrupted: {e}", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=True), image_input_data, gr.update(), gr.update()
         return
+    # post-process
     file_download_path = None
     _, _, content_for_tool = get_intent_status(full_response, is_vqa_flow and vqa_success)
+    content_for_tool = strict_sanitize_for_ui(content_for_tool)
     if current_intent == "image_generate":
         if not content_for_tool or len(content_for_tool.strip()) < MIN_MEANINGFUL_LENGTH:
+            history[-1]['content'] = "I detected an image generation request but didn't get enough details. Please give a short description (e.g. 'red bicycle at sunrise, vivid colors')."
         else:
             history[-1]['content'] = INTENT_STATUS_MAP[current_intent]
             yield history, stop_signal, history[-1]['content'], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
     elif current_intent == "ppt_generate":
         if not content_for_tool or len(content_for_tool.strip()) < MIN_MEANINGFUL_LENGTH:
+            history[-1]['content'] = "I can make a short presentation — please give a title and 3–5 bullet points."
         else:
             history[-1]['content'] = INTENT_STATUS_MAP[current_intent]
             yield history, stop_signal, history[-1]['content'], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
             history, file_download_path = generate_file_content(content_for_tool, history, "ppt")
     elif current_intent == "open_google":
+        final_text = (content_for_tool or "").strip() + "\n\n🔗 **Action:** [Search Google](https://www.google.com/search?q=" + re.sub(r'\s+', '+', strict_sanitize_for_ui(original_message)) + ")"
+        history[-1]['content'] = strict_sanitize_for_ui(final_text)
     elif current_intent == "open_camera":
+        final_text = (content_for_tool or "").strip() + "\n\n📸 **Action:** Use the 'Google Lens' button to capture an image."
+        history[-1]['content'] = strict_sanitize_for_ui(final_text)
     else:
+        # normal path: evaluate confidence and maybe augment
         final_response_content = check_confidence_and_augment(full_response, original_message)
         history[-1]['content'] = final_response_content
+    # final defensive fallback
     if not history[-1]['content'] or not str(history[-1]['content']).strip():
+        history[-1]['content'] = "Sorry — I couldn't produce a useful response. Could you rephrase or add details?"
     audio_file_path = text_to_audio(history[-1]['content'], is_voice_chat)
     hint = "✅ Response generated."
     yield history, False, hint, gr.update(interactive=True), gr.update(value="↑", interactive=True), audio_file_path, False, gr.update(visible=True), gr.update(value=None), gr.update(), file_download_path
+# --- GRADIO WRAPPERS ---
 def toggle_menu(current_visibility: bool) -> Tuple[bool, gr.update, gr.update, gr.update]:
     new_visibility = not current_visibility
     return new_visibility, gr.update(visible=new_visibility), gr.update(visible=False), gr.update(value="⬇️" if new_visibility else "➕")
 def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]]]:
     has_text = bool(user_message and user_message.strip())
     has_image = False
     if isinstance(staged_image_input, str):
     if not has_text and not has_image:
         return user_message, chat_history
+    # prevent double send if assistant currently thinking
     if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] and "thinking" in chat_history[-1]['content'].lower():
         return user_message, chat_history
     user_message_to_add = "Analyzing Staged Media." if (not has_text and has_image) else user_message.strip()
+    chat_history.append({"role": "user", "content": strict_sanitize_for_ui(user_message_to_add)})
     return "", chat_history
 def stage_file_upload(file_path: str) -> Tuple[Any, str, gr.update, gr.update]:
         return history, "Error: Could not find query.", gr.update(visible=False)
     web_results = web_search_tool(last_user_prompt)
     new_history = list(history)
+    new_history[-1]['content'] += "\n\n" + strict_sanitize_for_ui(web_results)
     return new_history, "✅ Double-checked with web facts.", gr.update(visible=False)
 def auto_capture_camera(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]], Any, gr.update, gr.update, gr.update, gr.update, gr.update]:
         chat_history[-1]['content'] = "📸 Preparing camera capture..."
     return "", chat_history, staged_image_input, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value="📸 Capturing in 3 seconds...", interactive=False), gr.update(value="➕")
+# --- GRADIO UI ---
 with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     stop_signal = gr.State(value=False)
     is_voice_chat = gr.State(value=False)
     staged_image = gr.State(value=None)
     menu_visible_state = gr.State(value=False)
+    gr.HTML("<h1 style='text-align: center; color: #4B0082;'>Prototype</h1>")
     hint_box = gr.Textbox(value="Ask anything", lines=1, show_label=False, interactive=False, placeholder="Luna's Action...", visible=True)
     file_download_output = gr.File(label="Generated File", visible=False)
     output_components = [chatbot, stop_signal, hint_box, txt, combined_btn, audio_output, is_voice_chat, fact_check_btn_row, staged_image, file_input, file_download_output]
+    # wiring
+    btn_menu.click(fn=toggle_menu, inputs=[menu_visible_state], outputs=[menu_visible_state, menu_options_row, fact_check_btn_row, btn_menu], queue=False)
     def prepare_file_upload(): return gr.update(visible=False), gr.update(value="➕"), gr.update(visible=False), gr.update(interactive=True), gr.update(value="")
     btn_add_files.click(fn=prepare_file_upload, inputs=[], outputs=[menu_options_row, btn_menu, fact_check_btn_row, file_input, txt], queue=False)
+    file_input.change(fn=stage_file_upload, inputs=[file_input], outputs=[staged_image, hint_box, txt, file_input], queue=False)
     btn_take_photo.click(
         fn=lambda: (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "📸 Camera Active. Capture an image.", gr.update(value="➕")),
     generator_inputs = [txt, staged_image, chatbot, stop_signal, is_voice_chat]
+    txt.submit(fn=user_turn, inputs=[txt, chatbot, staged_image], outputs=[txt, chatbot], queue=False).then(
         fn=chat_generator, inputs=generator_inputs, outputs=output_components, queue=True
+    ).then(fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False)
+    combined_btn.click(fn=user_turn, inputs=[txt, chatbot, staged_image], outputs=[txt, chatbot], queue=False).then(
         fn=chat_generator, inputs=generator_inputs, outputs=output_components, queue=True
+    ).then(fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False)
+    btn_fact_check.click(fn=manual_fact_check, inputs=[chatbot], outputs=[chatbot, hint_box, fact_check_btn_row], queue=True)
 demo.queue(max_size=20).launch(server_name="0.0.0.0")