Spaces:

cosmosai471
/

come_onnn

Running

App Files Files Community

cosmosai471 commited on 10 days ago

Commit

be95ded

verified ·

1 Parent(s): 127e5e0

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -90

app.py CHANGED Viewed

@@ -19,7 +19,6 @@ import numpy as np
 # --- CONFIGURATION & INITIALIZATION ---
 # Use string 'cpu' or GPU index string/int like '0'
 USER_DEVICE = "cpu"  # keep as "cpu" on CPU-only hosts; change to "0" for GPU 0
-# convert to values pipelines expect
 PIPELINE_DEVICE = -1 if str(USER_DEVICE).lower() == "cpu" else int(USER_DEVICE)
 TORCH_DEVICE = torch.device("cuda") if torch.cuda.is_available() and PIPELINE_DEVICE != -1 else torch.device("cpu")
@@ -35,7 +34,6 @@ REPO_ID = "cosmosai471/Luna-v3"
 MODEL_FILE = "luna.gguf"
 LOCAL_MODEL_PATH = MODEL_FILE
-# Note: model is expected to prefix outputs with [Intent: ...][Confidence: ...] but user wants those strictly hidden.
 SYSTEM_PROMPT = (
     "You are Luna, a helpful and friendly AI assistant. For internal tracing you may place Intent/Confidence tags, "
     "but DO NOT expose these tags in the user-facing response. Any Intent/Confidence/Action metadata must be kept internal."
@@ -46,13 +44,16 @@ CONFIDENCE_THRESHOLD = 30         # trigger web-search fallback only under this
 STREAM_CHAR_LIMIT = 35000         # cap streaming characters
 STREAM_ITER_LIMIT = 20000         # cap streaming iterations
 MIN_MEANINGFUL_LENGTH = 20        # min length for file-generation prompts
 def safe_del(self):
     try:
         if hasattr(self, "close") and callable(self.close):
             self.close()
     except Exception:
         pass
 Llama.__del__ = safe_del
 # --- MODEL LOADING ---
@@ -103,15 +104,14 @@ try:
 except Exception as e:
     print(f"⚠️ Could not load Image Generation pipeline. Image generation disabled. Error: {e}")
-# --- STRICT SANITIZERS & UTILITIES ---
 def simulate_recording_delay():
     time.sleep(3)
     return None
 def remove_bracketed_tags(text: str) -> str:
-    """Remove bracketed tags like [Intent: ...] [Confidence: ...] exactly."""
     if not text:
         return ""
     text = re.sub(r'\[Intent:\s*[\w\-\_]+\]', '', text, flags=re.IGNORECASE)
@@ -119,8 +119,8 @@ def remove_bracketed_tags(text: str) -> str:
     text = re.sub(r'\[Action:\s*[^\]]+\]', '', text, flags=re.IGNORECASE)
     return text
-def remove_plain_labels_lines(text: str) -> str:
-    """Remove plain lines or labels like 'Intent : ...', 'Confidence: 99', 'Action: ...'."""
     if not text:
         return ""
     text = re.sub(r'(?im)^\s*Intent\s*[:\-]\s*.*$', '', text)
@@ -128,56 +128,48 @@ def remove_plain_labels_lines(text: str) -> str:
     text = re.sub(r'(?im)^\s*Action\s*[:\-]\s*.*$', '', text)
     return text
-def remove_word_number_patterns(text: str) -> str:
-    """
-    Remove patterns like 'greeting 99 2. goodbye 99' or 'greeting 99' or 'label 100 0'.
-    This aggressively strips sequences of word tokens followed by small-digit numbers.
-    """
     if not text:
         return ""
-    # Pattern: word (letters, underscore, hyphen) followed by one or more numbers (1-3 digits),
-    # possibly repeated and possibly followed by punctuation.
-    # We will repeatedly remove such occurrences until nothing matches to avoid chained dumps.
-    pattern = re.compile(r'\b[a-zA-Z_\-]{2,40}\b(?:\s+\d{1,3}\b)+', flags=re.IGNORECASE)
-    prev = None
-    new = text
-    # iterative removal to handle multiple occurrences
-    while prev != new:
-        prev = new
-        new = pattern.sub('', new)
-    # also remove isolated numeric sequences that remain on their own
-    new = re.sub(r'\b\d{1,3}(?:\s+\d{1,3})*\b', '', new)
-    return new
-def collapse_whitespace_and_punct(text: str) -> str:
     if not text:
         return ""
-    # collapse multiple newlines and trim whitespace
     text = re.sub(r'\n\s*\n+', '\n\n', text)
-    # remove excessive spaces
     text = re.sub(r'[ \t]{2,}', ' ', text)
-    # trim leading/trailing
     return text.strip()
-def strict_sanitize_for_ui(raw: str) -> str:
     """
-    The final strict sanitizer that ensures NOTHING resembling Intent/Confidence/Action/
-    word-number dumps reaches the UI.
     """
     if not raw:
         return ""
     s = raw
     s = remove_bracketed_tags(s)
-    s = remove_plain_labels_lines(s)
-    s = remove_word_number_patterns(s)
-    s = collapse_whitespace_and_punct(s)
-    # final guard to remove leftover tokens like 'Intent' or 'Confidence' anywhere
     s = re.sub(r'(?i)\bIntent\b', '', s)
     s = re.sub(r'(?i)\bConfidence\b', '', s)
     s = re.sub(r'(?i)\bAction\b', '', s)
-    s = collapse_whitespace_and_punct(s)
     return s.strip()
 def web_search_tool(query: str) -> str:
     time.sleep(1.2)
     print(f"Simulating Google Search fallback for: {query}")
@@ -186,10 +178,9 @@ def web_search_tool(query: str) -> str:
 def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> str:
     """
     Internal: parse confidence if present (for logic only), but never display it. If fallback triggered,
-    append web results to sanitized response.
     """
-    cleaned_for_logic = remove_bracketed_tags(raw_response_with_tags)  # keep for length heuristic
-    # extract confidence if any (internal only)
     confidence_match = re.search(r'\[Confidence:\s*([0-9]{1,3})\]', raw_response_with_tags, flags=re.IGNORECASE)
     if confidence_match:
         try:
@@ -198,32 +189,38 @@ def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> st
         except Exception:
             confidence_score = 0
     else:
-        # heuristic
-        cleaned_no_tags = strict_sanitize_for_ui(cleaned_for_logic)
         confidence_score = 10 if not cleaned_no_tags or len(cleaned_no_tags) < 30 else 85
-    # If low, augment with web snippet; final output is always sanitized
     if confidence_score < CONFIDENCE_THRESHOLD:
         print(f"[internal] Low confidence ({confidence_score}%) detected -> using web fallback")
         supplement = web_search_tool(prompt)
-        out = strict_sanitize_for_ui(cleaned_for_logic)
         if not out:
-            out = "I couldn't generate a reliable answer. " + strict_sanitize_for_ui(supplement)
         else:
-            out = out + "\n\n" + strict_sanitize_for_ui(supplement)
     else:
-        out = strict_sanitize_for_ui(cleaned_for_logic)
-    # final guard: don't return empty
     out = out or "Sorry — I couldn't produce a good answer. Could you rephrase or give more details?"
     return out
-# --- IMAGE / VQA PROCESSING (robust) ---
 def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
     """
-    Uses image_pipe to produce VQA text. Returns prompt-injection (safe) + success flag.
-    This function is robust to multiple pipeline return shapes.
     """
     global image_pipe
     success = False
@@ -237,7 +234,6 @@ def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
         elif isinstance(image_data_or_path, np.ndarray):
             image = Image.fromarray(image_data_or_path).convert("RGB")
         else:
-            # bytes or file-like
             try:
                 image = Image.open(BytesIO(image_data_or_path)).convert("RGB")
             except Exception:
@@ -246,10 +242,13 @@ def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
         if image is None:
             return f"[Image Processing Error: Could not open image.] **User Query:** {message}", False
         vqa_prompt = f"USER: <image>\n{message}\nASSISTANT:"
-        # defensive calls: some pipelines accept prompt kwarg, some don't
         results = None
         try:
             results = image_pipe(image, prompt=vqa_prompt)
         except TypeError:
             try:
@@ -280,9 +279,8 @@ def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
             except Exception:
                 raw_text = ""
-        # pick assistant section if available
         vqa_response = raw_text.split("ASSISTANT:")[-1].strip() if raw_text else ""
-        vqa_response = strict_sanitize_for_ui(vqa_response)
         if not vqa_response or len(vqa_response) < 10:
             vqa_response = (
@@ -293,13 +291,12 @@ def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
         else:
             success = True
-        # Return safe, sanitized prompt injection for LLM
-        prompt_injection = f"**VQA Analysis:** {vqa_response}\n\n**User Query:** {strict_sanitize_for_ui(message)}"
         return prompt_injection, success
     except Exception as e:
         print(f"Image processing exception: {e}")
-        return f"[Image Processing Error: {e}] **User Query:** {strict_sanitize_for_ui(message)}", False
 # --- AUDIO / TTS ---
@@ -322,6 +319,7 @@ def transcribe_audio(audio_file_path: str) -> Tuple[str, str, gr.update, gr.upda
         error_msg = f"Transcription Error: {e}"
         return "", error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), False, gr.update(visible=False)
 def text_to_audio(text: str, is_voice_chat: bool) -> str or None:
     if not is_voice_chat:
         return None
@@ -337,7 +335,7 @@ def text_to_audio(text: str, is_voice_chat: bool) -> str or None:
             return None
     return None
-# --- INTENT STATUS MAP (internal only) ---
 INTENT_STATUS_MAP = {
     "code_generate": "Analyzing requirements and drafting code 💻...",
     "code_explain": "Reviewing code logic and writing explanation 💡...",
@@ -352,22 +350,47 @@ INTENT_STATUS_MAP = {
     "default": "Luna is thinking...",
 }
 def get_intent_status(raw_response: str, is_vqa_flow: bool) -> Tuple[str, str, str]:
     """
     Internal parsing: returns (intent, status, cleaned_display_text).
-    cleaned_display_text is strictly sanitized for UI (no tags, no word-number dumps).
     """
     intent_match = re.search(r'\[Intent:\s*([\w\-\_]+)\]', raw_response, re.IGNORECASE)
-    intent = intent_match.group(1).lower() if intent_match else "default"
     if is_vqa_flow:
         intent = "vqa"
-    # produce sanitized display text
-    cleaned_text = strict_sanitize_for_ui(raw_response)
     status = INTENT_STATUS_MAP.get(intent, INTENT_STATUS_MAP["default"])
     return intent, status, cleaned_text
 # --- FILE / IMAGE GENERATION ---
 def generate_file_content(content: str, history: List[Dict[str, str]], file_type: str):
     file_path = None
     try:
@@ -417,13 +440,13 @@ def generate_file_content(content: str, history: List[Dict[str, str]], file_type
     return history, file_path
 # --- CORE GENERATOR FUNCTION ---
 def chat_generator(message_from_input: str, image_input_data: Any, history: List[Dict[str, str]], stop_signal: bool, is_voice_chat: bool) -> Any:
     """
-    - assistant entry is appended only when generation actually starts (no empty box).
-    - streaming is strictly sanitized at every step.
-    - intent/confidence/action NEVER reach UI (aggressively stripped).
     """
-    # Expect last entry to be user
     if not history or history[-1]['role'] != 'user':
         yield history, False, "Error: Generator called in unexpected state (no user message found).", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
@@ -431,7 +454,7 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
     last_user_index = len(history) - 1
     original_message = history[last_user_index]['content'] or ""
-    # detect VQA flow
     is_vqa_flow = False
     if isinstance(image_input_data, str):
         is_vqa_flow = bool(image_input_data)
@@ -444,8 +467,8 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
     llm_input_message = original_message
     if is_vqa_flow:
         processed_message, vqa_success = process_image(image_input_data, original_message)
-        # annotate user's recorded message but preserve original for logic
-        history[last_user_index]['content'] = f"[IMAGE RECEIVED] {strict_sanitize_for_ui(original_message)}"
         llm_input_message = processed_message
     # build prompt
@@ -459,11 +482,10 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
             prompt += f"USER: {content}\n"
     prompt += f"USER: {llm_input_message}\nLUNA: "
-    # append assistant entry only now
     assistant_initial_text = "✨ Luna is starting to think..."
     history.append({"role": "assistant", "content": assistant_initial_text})
-    # initial UI hint
     yield history, stop_signal, assistant_initial_text, gr.update(value="", interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
     time.sleep(0.12)
@@ -479,11 +501,11 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
         )
     except Exception as e:
         err = f"❌ Error generating response: {e}"
-        history[-1]['content'] = strict_sanitize_for_ui(err)
         yield history, False, err, gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
-    # stream tokens; strictly sanitize and cap
     try:
         for output in stream:
             iter_count += 1
@@ -502,10 +524,8 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
                 print("Stream truncated by char limit.")
                 break
-            # extract intent/status for internal UX hints but NEVER show tags
             current_intent, current_hint, interim = get_intent_status(full_response, is_vqa_flow and vqa_success)
-            # ALWAYS sanitize interim for UI
-            interim_ui = strict_sanitize_for_ui(interim)
             if not interim_ui:
                 interim_ui = "✨ Luna is forming a reply..."
             history[-1]['content'] = interim_ui
@@ -514,7 +534,7 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
     except Exception as e:
         _, _, salvage = get_intent_status(full_response, is_vqa_flow and vqa_success)
-        salvage_ui = strict_sanitize_for_ui(salvage) or f"⚠️ Streaming interrupted: {e}"
         history[-1]['content'] = salvage_ui
         yield history, False, f"⚠️ Streaming interrupted: {e}", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=True), image_input_data, gr.update(), gr.update()
         return
@@ -522,7 +542,7 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
     # post-process
     file_download_path = None
     _, _, content_for_tool = get_intent_status(full_response, is_vqa_flow and vqa_success)
-    content_for_tool = strict_sanitize_for_ui(content_for_tool)
     if current_intent == "image_generate":
         if not content_for_tool or len(content_for_tool.strip()) < MIN_MEANINGFUL_LENGTH:
@@ -549,19 +569,17 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
             history, file_download_path = generate_file_content(content_for_tool, history, "ppt")
     elif current_intent == "open_google":
-        final_text = (content_for_tool or "").strip() + "\n\n🔗 **Action:** [Search Google](https://www.google.com/search?q=" + re.sub(r'\s+', '+', strict_sanitize_for_ui(original_message)) + ")"
-        history[-1]['content'] = strict_sanitize_for_ui(final_text)
     elif current_intent == "open_camera":
         final_text = (content_for_tool or "").strip() + "\n\n📸 **Action:** Use the 'Google Lens' button to capture an image."
-        history[-1]['content'] = strict_sanitize_for_ui(final_text)
     else:
-        # normal path: evaluate confidence and maybe augment
         final_response_content = check_confidence_and_augment(full_response, original_message)
         history[-1]['content'] = final_response_content
-    # final defensive fallback
     if not history[-1]['content'] or not str(history[-1]['content']).strip():
         history[-1]['content'] = "Sorry — I couldn't produce a useful response. Could you rephrase or add details?"
@@ -576,6 +594,7 @@ def toggle_menu(current_visibility: bool) -> Tuple[bool, gr.update, gr.update, g
     new_visibility = not current_visibility
     return new_visibility, gr.update(visible=new_visibility), gr.update(visible=False), gr.update(value="⬇️" if new_visibility else "➕")
 def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]]]:
     has_text = bool(user_message and user_message.strip())
     has_image = False
@@ -589,22 +608,24 @@ def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_imag
     if not has_text and not has_image:
         return user_message, chat_history
-    # prevent double send if assistant currently thinking
     if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] and "thinking" in chat_history[-1]['content'].lower():
         return user_message, chat_history
     user_message_to_add = "Analyzing Staged Media." if (not has_text and has_image) else user_message.strip()
-    chat_history.append({"role": "user", "content": strict_sanitize_for_ui(user_message_to_add)})
     return "", chat_history
 def stage_file_upload(file_path: str) -> Tuple[Any, str, gr.update, gr.update]:
     if file_path:
         return file_path, f"📎 File staged: {os.path.basename(file_path)}. Click send (✈️).", gr.update(value="", interactive=True), gr.update(interactive=False)
     return None, "File upload cancelled.", gr.update(value="", interactive=True), gr.update(interactive=False)
 def clear_staged_media() -> gr.update:
     return gr.update(value=None)
 def manual_fact_check(history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str, gr.update]:
     if not history or not history[-1]['content']:
         return history, "Error: No final response to check.", gr.update(visible=False)
@@ -617,9 +638,10 @@ def manual_fact_check(history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str
         return history, "Error: Could not find query.", gr.update(visible=False)
     web_results = web_search_tool(last_user_prompt)
     new_history = list(history)
-    new_history[-1]['content'] += "\n\n" + strict_sanitize_for_ui(web_results)
     return new_history, "✅ Double-checked with web facts.", gr.update(visible=False)
 def auto_capture_camera(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]], Any, gr.update, gr.update, gr.update, gr.update, gr.update]:
     _, chat_history = user_turn(user_message, chat_history, staged_image_input)
     if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] == "":
@@ -627,13 +649,13 @@ def auto_capture_camera(user_message: str, chat_history: List[Dict[str, str]], s
     return "", chat_history, staged_image_input, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value="📸 Capturing in 3 seconds...", interactive=False), gr.update(value="➕")
 # --- GRADIO UI ---
-with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     stop_signal = gr.State(value=False)
     is_voice_chat = gr.State(value=False)
     staged_image = gr.State(value=None)
     menu_visible_state = gr.State(value=False)
-    gr.HTML("<h1 style='text-align: center; color: #4B0082;'>Prototype</h1>")
     hint_box = gr.Textbox(value="Ask anything", lines=1, show_label=False, interactive=False, placeholder="Luna's Action...", visible=True)
     file_download_output = gr.File(label="Generated File", visible=False)

 # --- CONFIGURATION & INITIALIZATION ---
 # Use string 'cpu' or GPU index string/int like '0'
 USER_DEVICE = "cpu"  # keep as "cpu" on CPU-only hosts; change to "0" for GPU 0
 PIPELINE_DEVICE = -1 if str(USER_DEVICE).lower() == "cpu" else int(USER_DEVICE)
 TORCH_DEVICE = torch.device("cuda") if torch.cuda.is_available() and PIPELINE_DEVICE != -1 else torch.device("cpu")
 MODEL_FILE = "luna.gguf"
 LOCAL_MODEL_PATH = MODEL_FILE
 SYSTEM_PROMPT = (
     "You are Luna, a helpful and friendly AI assistant. For internal tracing you may place Intent/Confidence tags, "
     "but DO NOT expose these tags in the user-facing response. Any Intent/Confidence/Action metadata must be kept internal."
 STREAM_CHAR_LIMIT = 35000         # cap streaming characters
 STREAM_ITER_LIMIT = 20000         # cap streaming iterations
 MIN_MEANINGFUL_LENGTH = 20        # min length for file-generation prompts
+IMAGE_MAX_SIDE = 1024            # resize images to this max side before sending to image pipeline
+# safe destructor for Llama objects
 def safe_del(self):
     try:
         if hasattr(self, "close") and callable(self.close):
             self.close()
     except Exception:
         pass
 Llama.__del__ = safe_del
 # --- MODEL LOADING ---
 except Exception as e:
     print(f"⚠️ Could not load Image Generation pipeline. Image generation disabled. Error: {e}")
+# --- SANITIZERS & UTILITIES ---
 def simulate_recording_delay():
     time.sleep(3)
     return None
 def remove_bracketed_tags(text: str) -> str:
+    """Remove bracketed tags like [Intent: ...] [Confidence: ...] exactly (safe)."""
     if not text:
         return ""
     text = re.sub(r'\[Intent:\s*[\w\-\_]+\]', '', text, flags=re.IGNORECASE)
     text = re.sub(r'\[Action:\s*[^\]]+\]', '', text, flags=re.IGNORECASE)
     return text
+def remove_plain_tag_lines(text: str) -> str:
+    """Remove whole lines that are just 'Intent: ...' or 'Confidence: ...' preserving inline content."""
     if not text:
         return ""
     text = re.sub(r'(?im)^\s*Intent\s*[:\-]\s*.*$', '', text)
     text = re.sub(r'(?im)^\s*Action\s*[:\-]\s*.*$', '', text)
     return text
+def remove_word_number_dumps(text: str) -> str:
+    """Remove big classifier dumps like 'greeting 99 2. goodbye 99' but try to preserve normal text.
+    This removes sequences where a word token is followed immediately by 1-3 numbers and repeats (likely classifier logs).
+    Only removes when they appear as standalone clusters (surrounded by line breaks or punctuation)."""
     if not text:
         return ""
+    # find clusters between line boundaries or punctuation
+    cluster_pattern = re.compile(r'(?:\n|^|[\(\[\{\.;:,\-\|>])\s*([a-zA-Z_\-]{2,40}(?:\s+\d{1,3}){1,4}(?:\s+[a-zA-Z_\-]{2,40}(?:\s+\d{1,3}){1,4})*)\s*(?:\n|$|[\)\]\}\.;:,\-\|<])', flags=re.IGNORECASE)
+    def _strip_cluster(m):
+        return '\n'  # replace cluster with a newline to preserve sentence boundaries
+    text = cluster_pattern.sub(_strip_cluster, text)
+    # remove leftover isolated numeric sequences (only small groups)
+    text = re.sub(r'\b\d{2,3}(?:\s+\d{1,3})*\b', '', text)
+    return text
+def collapse_whitespace(text: str) -> str:
     if not text:
         return ""
     text = re.sub(r'\n\s*\n+', '\n\n', text)
     text = re.sub(r'[ \t]{2,}', ' ', text)
     return text.strip()
+def moderate_sanitize_for_ui(raw: str) -> str:
     """
+    Moderate sanitizer: removes bracketed tags, whole tag-lines, and classifier dumps (carefully),
+    but otherwise preserves natural language content.
     """
     if not raw:
         return ""
     s = raw
     s = remove_bracketed_tags(s)
+    s = remove_plain_tag_lines(s)
+    s = remove_word_number_dumps(s)
+    s = collapse_whitespace(s)
+    # final quick guard to remove exact words 'Intent' or 'Confidence' if accidentally left alone
     s = re.sub(r'(?i)\bIntent\b', '', s)
     s = re.sub(r'(?i)\bConfidence\b', '', s)
     s = re.sub(r'(?i)\bAction\b', '', s)
+    s = collapse_whitespace(s)
     return s.strip()
+# web-search stub
 def web_search_tool(query: str) -> str:
     time.sleep(1.2)
     print(f"Simulating Google Search fallback for: {query}")
 def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> str:
     """
     Internal: parse confidence if present (for logic only), but never display it. If fallback triggered,
+    append web results to sanitized response. Uses moderate sanitizer to avoid eating valid content.
     """
+    cleaned_for_logic = remove_bracketed_tags(raw_response_with_tags)
     confidence_match = re.search(r'\[Confidence:\s*([0-9]{1,3})\]', raw_response_with_tags, flags=re.IGNORECASE)
     if confidence_match:
         try:
         except Exception:
             confidence_score = 0
     else:
+        cleaned_no_tags = moderate_sanitize_for_ui(cleaned_for_logic)
         confidence_score = 10 if not cleaned_no_tags or len(cleaned_no_tags) < 30 else 85
     if confidence_score < CONFIDENCE_THRESHOLD:
         print(f"[internal] Low confidence ({confidence_score}%) detected -> using web fallback")
         supplement = web_search_tool(prompt)
+        out = moderate_sanitize_for_ui(cleaned_for_logic)
         if not out:
+            out = "I couldn't generate a reliable answer. " + moderate_sanitize_for_ui(supplement)
         else:
+            out = out + "\n\n" + moderate_sanitize_for_ui(supplement)
     else:
+        out = moderate_sanitize_for_ui(cleaned_for_logic)
     out = out or "Sorry — I couldn't produce a good answer. Could you rephrase or give more details?"
     return out
+# --- IMAGE / VQA PROCESSING (robust + resize) ---
+def _resize_image_keep_aspect(img: Image.Image, max_side: int) -> Image.Image:
+    w, h = img.size
+    if max(w, h) <= max_side:
+        return img
+    scale = max_side / float(max(w, h))
+    new_w = int(w * scale)
+    new_h = int(h * scale)
+    return img.resize((new_w, new_h), Image.LANCZOS)
 def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
     """
+    Uses image_pipe to produce VQA text. Resizes image to avoid token/feature mismatch issues.
+    Returns prompt-injection (safe) + success flag.
     """
     global image_pipe
     success = False
         elif isinstance(image_data_or_path, np.ndarray):
             image = Image.fromarray(image_data_or_path).convert("RGB")
         else:
             try:
                 image = Image.open(BytesIO(image_data_or_path)).convert("RGB")
             except Exception:
         if image is None:
             return f"[Image Processing Error: Could not open image.] **User Query:** {message}", False
+        # Resize defensively before passing to VLM pipeline (fixes token/features mismatch errors)
+        image = _resize_image_keep_aspect(image, IMAGE_MAX_SIDE)
         vqa_prompt = f"USER: <image>\n{message}\nASSISTANT:"
         results = None
         try:
+            # preferred signature
             results = image_pipe(image, prompt=vqa_prompt)
         except TypeError:
             try:
             except Exception:
                 raw_text = ""
         vqa_response = raw_text.split("ASSISTANT:")[-1].strip() if raw_text else ""
+        vqa_response = moderate_sanitize_for_ui(vqa_response)
         if not vqa_response or len(vqa_response) < 10:
             vqa_response = (
         else:
             success = True
+        prompt_injection = f"**VQA Analysis:** {vqa_response}\n\n**User Query:** {moderate_sanitize_for_ui(message)}"
         return prompt_injection, success
     except Exception as e:
         print(f"Image processing exception: {e}")
+        return f"[Image Processing Error: {e}] **User Query:** {moderate_sanitize_for_ui(message)}", False
 # --- AUDIO / TTS ---
         error_msg = f"Transcription Error: {e}"
         return "", error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), False, gr.update(visible=False)
 def text_to_audio(text: str, is_voice_chat: bool) -> str or None:
     if not is_voice_chat:
         return None
             return None
     return None
+# --- INTENT MAP & PARSING ---
 INTENT_STATUS_MAP = {
     "code_generate": "Analyzing requirements and drafting code 💻...",
     "code_explain": "Reviewing code logic and writing explanation 💡...",
     "default": "Luna is thinking...",
 }
+# Additional keyword-based intent inference (helps when model doesn't include tags)
+INTENT_KEYWORD_MAP = [
+    (re.compile(r"\b(create|generate|make)\b.*\b(image|picture|photo|art)\b", flags=re.IGNORECASE), "image_generate"),
+    (re.compile(r"\b(create|generate|make)\b.*\b(document|doc|report|letter|resume)\b", flags=re.IGNORECASE), "doc_generate"),
+    (re.compile(r"\b(create|generate|make)\b.*\b(presentation|ppt|slides)\b", flags=re.IGNORECASE), "ppt_generate"),
+]
+def infer_intent_from_content(text: str) -> str:
+    if not text:
+        return "default"
+    for patt, intent in INTENT_KEYWORD_MAP:
+        if patt.search(text):
+            return intent
+    return "default"
 def get_intent_status(raw_response: str, is_vqa_flow: bool) -> Tuple[str, str, str]:
     """
     Internal parsing: returns (intent, status, cleaned_display_text).
+    cleaned_display_text preserves content but strips tags/garbage moderately.
+    If no explicit [Intent:] tag is found, infer intent from content_for_tool keywords.
     """
     intent_match = re.search(r'\[Intent:\s*([\w\-\_]+)\]', raw_response, re.IGNORECASE)
+    intent = intent_match.group(1).lower() if intent_match else None
     if is_vqa_flow:
         intent = "vqa"
+    cleaned_text = moderate_sanitize_for_ui(raw_response)
+    # If no explicit intent from tags, try to infer from cleaned_text
+    if not intent or intent == "default":
+        inferred = infer_intent_from_content(cleaned_text)
+        if inferred != "default":
+            intent = inferred
+    intent = intent or "default"
     status = INTENT_STATUS_MAP.get(intent, INTENT_STATUS_MAP["default"])
     return intent, status, cleaned_text
 # --- FILE / IMAGE GENERATION ---
 def generate_file_content(content: str, history: List[Dict[str, str]], file_type: str):
     file_path = None
     try:
     return history, file_path
 # --- CORE GENERATOR FUNCTION ---
 def chat_generator(message_from_input: str, image_input_data: Any, history: List[Dict[str, str]], stop_signal: bool, is_voice_chat: bool) -> Any:
     """
+    - assistant entry appended only when generation actually starts (no empty box).
+    - streaming sanitized moderately to keep meaning while removing metadata.
+    - when image is attached, VQA flow is strictly used (image model output injected to LLM).
     """
     if not history or history[-1]['role'] != 'user':
         yield history, False, "Error: Generator called in unexpected state (no user message found).", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
     last_user_index = len(history) - 1
     original_message = history[last_user_index]['content'] or ""
+    # detect VQA flow: if image attached, force image flow
     is_vqa_flow = False
     if isinstance(image_input_data, str):
         is_vqa_flow = bool(image_input_data)
     llm_input_message = original_message
     if is_vqa_flow:
         processed_message, vqa_success = process_image(image_input_data, original_message)
+        history[last_user_index]['content'] = f"[IMAGE RECEIVED] {moderate_sanitize_for_ui(original_message)}"
+        # ensure that LLM prompt includes VQA analysis and the user message
         llm_input_message = processed_message
     # build prompt
             prompt += f"USER: {content}\n"
     prompt += f"USER: {llm_input_message}\nLUNA: "
+    # append assistant entry now
     assistant_initial_text = "✨ Luna is starting to think..."
     history.append({"role": "assistant", "content": assistant_initial_text})
     yield history, stop_signal, assistant_initial_text, gr.update(value="", interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
     time.sleep(0.12)
         )
     except Exception as e:
         err = f"❌ Error generating response: {e}"
+        history[-1]['content'] = moderate_sanitize_for_ui(err)
         yield history, False, err, gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
+    # stream tokens; moderately sanitize and cap
     try:
         for output in stream:
             iter_count += 1
                 print("Stream truncated by char limit.")
                 break
             current_intent, current_hint, interim = get_intent_status(full_response, is_vqa_flow and vqa_success)
+            interim_ui = moderate_sanitize_for_ui(interim)
             if not interim_ui:
                 interim_ui = "✨ Luna is forming a reply..."
             history[-1]['content'] = interim_ui
     except Exception as e:
         _, _, salvage = get_intent_status(full_response, is_vqa_flow and vqa_success)
+        salvage_ui = moderate_sanitize_for_ui(salvage) or f"⚠️ Streaming interrupted: {e}"
         history[-1]['content'] = salvage_ui
         yield history, False, f"⚠️ Streaming interrupted: {e}", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=True), image_input_data, gr.update(), gr.update()
         return
     # post-process
     file_download_path = None
     _, _, content_for_tool = get_intent_status(full_response, is_vqa_flow and vqa_success)
+    content_for_tool = moderate_sanitize_for_ui(content_for_tool)
     if current_intent == "image_generate":
         if not content_for_tool or len(content_for_tool.strip()) < MIN_MEANINGFUL_LENGTH:
             history, file_download_path = generate_file_content(content_for_tool, history, "ppt")
     elif current_intent == "open_google":
+        final_text = (content_for_tool or "").strip() + "\n\n🔗 **Action:** [Search Google](https://www.google.com/search?q=" + re.sub(r'\s+', '+', moderate_sanitize_for_ui(original_message)) + ")"
+        history[-1]['content'] = moderate_sanitize_for_ui(final_text)
     elif current_intent == "open_camera":
         final_text = (content_for_tool or "").strip() + "\n\n📸 **Action:** Use the 'Google Lens' button to capture an image."
+        history[-1]['content'] = moderate_sanitize_for_ui(final_text)
     else:
         final_response_content = check_confidence_and_augment(full_response, original_message)
         history[-1]['content'] = final_response_content
     if not history[-1]['content'] or not str(history[-1]['content']).strip():
         history[-1]['content'] = "Sorry — I couldn't produce a useful response. Could you rephrase or add details?"
     new_visibility = not current_visibility
     return new_visibility, gr.update(visible=new_visibility), gr.update(visible=False), gr.update(value="⬇️" if new_visibility else "➕")
 def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]]]:
     has_text = bool(user_message and user_message.strip())
     has_image = False
     if not has_text and not has_image:
         return user_message, chat_history
     if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] and "thinking" in chat_history[-1]['content'].lower():
         return user_message, chat_history
     user_message_to_add = "Analyzing Staged Media." if (not has_text and has_image) else user_message.strip()
+    chat_history.append({"role": "user", "content": moderate_sanitize_for_ui(user_message_to_add)})
     return "", chat_history
 def stage_file_upload(file_path: str) -> Tuple[Any, str, gr.update, gr.update]:
     if file_path:
         return file_path, f"📎 File staged: {os.path.basename(file_path)}. Click send (✈️).", gr.update(value="", interactive=True), gr.update(interactive=False)
     return None, "File upload cancelled.", gr.update(value="", interactive=True), gr.update(interactive=False)
 def clear_staged_media() -> gr.update:
     return gr.update(value=None)
 def manual_fact_check(history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str, gr.update]:
     if not history or not history[-1]['content']:
         return history, "Error: No final response to check.", gr.update(visible=False)
         return history, "Error: Could not find query.", gr.update(visible=False)
     web_results = web_search_tool(last_user_prompt)
     new_history = list(history)
+    new_history[-1]['content'] += "\n\n" + moderate_sanitize_for_ui(web_results)
     return new_history, "✅ Double-checked with web facts.", gr.update(visible=False)
 def auto_capture_camera(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]], Any, gr.update, gr.update, gr.update, gr.update, gr.update]:
     _, chat_history = user_turn(user_message, chat_history, staged_image_input)
     if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] == "":
     return "", chat_history, staged_image_input, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value="📸 Capturing in 3 seconds...", interactive=False), gr.update(value="➕")
 # --- GRADIO UI ---
+with gr.Blocks(theme=gr.themes.Soft(), title="Prototype") as demo:
     stop_signal = gr.State(value=False)
     is_voice_chat = gr.State(value=False)
     staged_image = gr.State(value=None)
     menu_visible_state = gr.State(value=False)
+    gr.HTML("<h1 style='text-align: center; color: #4B0082;'>🌙 Prototype</h1>")
     hint_box = gr.Textbox(value="Ask anything", lines=1, show_label=False, interactive=False, placeholder="Luna's Action...", visible=True)
     file_download_output = gr.File(label="Generated File", visible=False)